diff --git a/lib/OpenElementsStack.php b/lib/OpenElementsStack.php index 19d60d5..aa97611 100644 --- a/lib/OpenElementsStack.php +++ b/lib/OpenElementsStack.php @@ -66,6 +66,20 @@ class OpenElementsStack extends Stack { return -1; } + // Remove an arbitrary element from the array. + public function remove($target) { + $key = $this->search($target); + if ($key === -1) { + return; + } elseif ($key === count($this->_storage) - 1) { + $this->pop(); + return; + } + + unset($this->_storage[$key]); + $this->_storage = array_values($this->_storage); + } + public function generateImpliedEndTags($exclude = []) { $tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']; @@ -99,154 +113,252 @@ class OpenElementsStack extends Stack { } } - public function hasElementInScope(string $target): bool { - return $this->hasElementInScopeHandler($target); - } - - public function hasElementInListItemScope(string $target): bool { - return $this->hasElementInScopeHandler($target, 1); - } - - public function hasElementInButtonScope(string $target): bool { - return $this->hasElementInScopeHandler($target, 2); - } - - public function hasElementInTableScope(string $target): bool { - return $this->hasElementInScopeHandler($target, 3); - } - - public function hasElementInSelectScope(string $target): bool { - return $this->hasElementInScopeHandler($target, 4); - } - - protected function hasElementInScopeHandler(string $target, int $type = 0): bool { - switch ($type) { - case 0: $func = 'isElementInScope'; - break; - case 1: $func = 'isElementInListScope'; - break; - case 2: $func = 'isElementInButtonScope'; - break; - case 3: $func = 'isElementInTableScope'; - break; - case 4: $func = 'isElementInSelectScope'; - break; - default: return false; - } - - # 1. Initialize node to be the current node (the bottommost node of the stack). - // Handled by loop. - foreach (array_reverse($this->_storage) as $node) { - # 2. If node is the target node, terminate in a match state. - if ($node->nodeName === $target) { - return true; - } - # 3. Otherwise, if node is one of the element types in list, terminate in a - # failure state. - elseif ($this->$func($node)) { - return false; - } - - # Otherwise, set node to the previous entry in the stack of open elements and - # return to step 2. (This will never fail, since the loop will always terminate - # in the previous step if the top of the stack — an html element — is reached.) - // Handled by loop. - } + public function hasElementInScope($target): bool { + # The stack of open elements is said to have a particular element in scope when + # it has that element in the specific scope consisting of the following element + # types: + # + # applet + # caption + # html + # table + # td + # th + # marquee + # object + # template + # MathML mi + # MathML mo + # MathML mn + # MathML ms + # MathML mtext + # MathML annotation-xml + # SVG foreignObject + # SVG desc + # SVG title - return false; + $list = [ + Parser::HTML_NAMESPACE => [ + 'applet', + 'caption', + 'html', + 'table', + 'td', + 'th', + 'marquee', + 'object', + 'template' + ], + + Parser::MATHML_NAMESPACE => [ + 'mi', + 'mo', + 'mn', + 'ms', + 'mtext', + 'annotation-xml' + ], + + Parser::SVG_NAMESPACE => [ + 'foreignObject', + 'desc', + 'title' + ] + ]; + + return $this->hasElementInScopeHandler($target, $list); } - protected function isElementInListItemScope(Element $element): bool { - $name = $element->name; - $ns = $element->namespaceURI; - - # The stack of open elements is said to have a particular element in list item - # scope when it has that element in the specific scope consisting of the - # following element types: + public function hasElementInListItemScope($target): bool { + # The stack of open elements is said to have a particular element in list item scope when it has that element in the specific scope consisting of the following element types: # - # All the element types listed above for the has an element in scope - # algorithm. + # All the element types listed above for the has an element in scope algorithm. # ol in the HTML namespace # ul in the HTML namespace - return ($this->isElementInScope($element) || ($ns === '' && ($name === 'ol' || $name === 'ul'))) ? true : false; + $list = [ + Parser::HTML_NAMESPACE => [ + 'applet', + 'caption', + 'html', + 'table', + 'td', + 'th', + 'marquee', + 'object', + 'template', + 'ol', + 'ul' + ], + + Parser::MATHML_NAMESPACE => [ + 'mi', + 'mo', + 'mn', + 'ms', + 'mtext', + 'annotation-xml' + ], + + Parser::SVG_NAMESPACE => [ + 'foreignObject', + 'desc', + 'title' + ] + ]; + + return $this->hasElementInScopeHandler($target, $list); } - protected function isElementInButtonScope(Element $element): bool { - $name = $element->name; - $ns = $element->namespaceURI; - + public function hasElementInButtonScope($target): bool { # The stack of open elements is said to have a particular element in button # scope when it has that element in the specific scope consisting of the # following element types: # - # All the element types listed above for the has an element in scope - # algorithm. + # All the element types listed above for the has an element in scope algorithm. # button in the HTML namespace - return ($this->isElementInScope($element) || ($ns === '' && $name === 'button')) ? true : false; + $list = [ + Parser::HTML_NAMESPACE => [ + 'applet', + 'caption', + 'html', + 'table', + 'td', + 'th', + 'marquee', + 'object', + 'template', + 'button' + ], + + Parser::MATHML_NAMESPACE => [ + 'mi', + 'mo', + 'mn', + 'ms', + 'mtext', + 'annotation-xml' + ], + + Parser::SVG_NAMESPACE => [ + 'foreignObject', + 'desc', + 'title' + ] + ]; + + return $this->hasElementInScopeHandler($target, $list); } - protected function isElementInTableScope(Element $element): bool { - $name = $element->name; - + public function hasElementInTableScope($target): bool { # The stack of open elements is said to have a particular element in table scope # when it has that element in the specific scope consisting of the following # element types: # + # All the element types listed above for the has an element in scope algorithm. # html in the HTML namespace # table in the HTML namespace # template in the HTML namespace - return ($element->namespaceURI === '' && ($name === 'html' || $name === 'table' || $name === 'template')) ? true : false; - } + // Not sure what to do here. I am going to assume the elements without a + // namespace in the element types listed above are meant for the HTML namespace. + // If so then these listed here are redundant. My interpretation therefore has + // this being an alias for hasElementInScope. - protected function isElementInSelectScope(Element $element): bool { - $name = $element->name; - $ns = $element->namespaceURI; + return $this->hasElementInScope($target); + } + public function hasElementInSelectScope(string $target): bool { # The stack of open elements is said to have a particular element in select # scope when it has that element in the specific scope consisting of all element # types except the following: # + # All the element types listed above for the has an element in scope algorithm. # optgroup in the HTML namespace # option in the HTML namespace - return ($element->namespaceURI === '' && ($name === 'optgroup' || $name === 'option')) ? false : true; + $list = [ + Parser::HTML_NAMESPACE => [ + 'applet', + 'caption', + 'html', + 'table', + 'td', + 'th', + 'marquee', + 'object', + 'template', + 'button', + 'optgroup', + 'option' + ], + + Parser::MATHML_NAMESPACE => [ + 'mi', + 'mo', + 'mn', + 'ms', + 'mtext', + 'annotation-xml' + ], + + Parser::SVG_NAMESPACE => [ + 'foreignObject', + 'desc', + 'title' + ] + ]; + + return $this->hasElementInScopeHandler($target, $list); } - protected function isElementInScope(Element $element): bool { - $name = $element->name; - $ns = $element->namespaceURI; - # The stack of open elements is said to have a particular element in scope when - # it has that element in the specific scope consisting of the following element - # types: - # - # applet - # caption - # html - # table - # td - # th - # marquee - # object - # template - # MathML mi - # MathML mo - # MathML mn - # MathML ms - # MathML mtext - # MathML annotation-xml - # SVG foreignObject - # SVG desc - # SVG title + protected function hasElementInScopeHandler($target, array $list): bool { + # 1. Initialize node to be the current node (the bottommost node of the stack). + // Handled by loop. + foreach (array_reverse($this->_storage) as $node) { + # 2. If node is the target node, terminate in a match state. + if ($target instanceof DOMElement) { + if ($node->isSameNode($target)) { + return true; + } + } elseif (is_string($target)) { + if ($node->nodeName === $target) { + return true; + } + } - return (($ns === '' && ($name === 'applet' || $name === 'caption' || $name === 'html' || $name === 'table' || $name === 'td' || $name === 'th' || $name === 'marquee' || $name === 'object' || $name === 'template')) || - ($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) || - ($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title'))) ? true : false; + # 3. Otherwise, if node is one of the element types in list, terminate in a + # failure state. + else { + foreach ($list as $namespace => $subList) { + if ($namespace === Parser::HTML_NAMESPACE) { + $namespace = ''; + } + + if ($node->namespaceURI !== $namespace) { + continue; + } + + foreach ($subList as $name) { + if ($node->nodeName === $name) { + return false; + } + } + } + } + + # Otherwise, set node to the previous entry in the stack of open elements and + # return to step 2. (This will never fail, since the loop will always terminate + # in the previous step if the top of the stack — an html element — is reached.) + // Handled by loop. + } + + return false; } + public function __get($property) { $value = parent::__get($property); if (!is_null($value)) { diff --git a/lib/TreeBuilder.php b/lib/TreeBuilder.php index 61b916d..75ae5dd 100644 --- a/lib/TreeBuilder.php +++ b/lib/TreeBuilder.php @@ -1299,7 +1299,7 @@ class TreeBuilder { elseif ($token->name === 'body' || $token->name === 'html') { # If the stack of open elements does not have a body element in scope, this is a # parse error; ignore the token. - if ($this->stack->search('body') === -1) { + if (!$this->stack->hasElementInScope('body')) { ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body'); } # Otherwise, if there is a node in the stack of open elements that is not either @@ -1359,6 +1359,51 @@ class TreeBuilder { $this->stack->popUntil($token->name); } } + # An end tag whose tag name is "form" + elseif ($token->name === 'form') { + # If there is no template element on the stack of open elements, then run these + # substeps: + if ($this->stack->search('template') === -1) { + # 1. Let node be the element that the form element pointer is set to, or null if it + # is not set to an element. + $node = $this->formElement; + # 2. Set the form element pointer to null. + $this->formElement = null; + # 3. If node is null or if the stack of open elements does not have node in + # scope, then this is a parse error; return and ignore the token. + if (is_null($node) || !$this->stack->hasElementInScope($node)) { + ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name); + return true; + } + # 4. Generate implied end tags. + $this->stack->generateImpliedEndTags(); + # 5. If the current node is not node, then this is a parse error. + if (!$this->stack->currentNode->isSameNode($node)) { + ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name); + } + # 6. Remove node from the stack of open elements + $this->stack->remove($node); + } + # If there is a template element on the stack of open elements, then run these + # substeps instead: + else { + # 1. If the stack of open elements does not have a form element in scope, then + # this is a parse error; return and ignore the token. + if ($this->stack->hasElementInScope('form')) { + ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name); + return true; + } + # 2. Generate implied end tags. + $this->stack->generateImpliedEndTags(); + # 3. If the current node is not a form element, then this is a parse error. + if (!$this->stack->currentNodeName !== 'form') { + ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name); + } + # 4. Pop elements from the stack of open elements until a form element has been + # popped from the stack. + $this->stack->popUntil('form'); + } + } } # An end-of-file token elseif ($token instanceof EOFToken) {