From 8e871494198a093b255486f3a45c39087f4ec3c4 Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Fri, 14 Sep 2018 14:16:07 -0500 Subject: [PATCH] Daily TreeBuilder additions --- lib/DOM/Element.php | 91 +++++++++++++++++++++++++++++++++++++++ lib/OpenElementsStack.php | 58 ++++++++++++++++++++++--- lib/TreeBuilder.php | 60 ++++++++++++++++++++++---- 3 files changed, 196 insertions(+), 13 deletions(-) diff --git a/lib/DOM/Element.php b/lib/DOM/Element.php index f3a419b..08eef00 100644 --- a/lib/DOM/Element.php +++ b/lib/DOM/Element.php @@ -135,4 +135,95 @@ class Element extends \DOMElement { return $s; } + + public function isInListItemScope(): bool { + $name = $this->name; + $ns = $this->namespaceURI; + + # The stack of open elements is said to have a particular element in list item + # scope when it has that element in the specific scope consisting of the + # following element types: + # + # All the element types listed above for the has an element in scope + # algorithm. + # ol in the HTML namespace + # ul in the HTML namespace + + return ($this->isInScope() || ($ns === '' && ($name === 'ol' || $name === 'ul'))) ? true : false; + } + + public function isInButtonScope(): bool { + $name = $this->name; + $ns = $this->namespaceURI; + + # The stack of open elements is said to have a particular element in button + # scope when it has that element in the specific scope consisting of the + # following element types: + # + # All the element types listed above for the has an element in scope + # algorithm. + # button in the HTML namespace + + return ($this->isInScope() || ($ns === '' && $name === 'button')) ? true : false; + } + + public function isInTableScope(): bool { + $name = $this->name; + + # The stack of open elements is said to have a particular element in table scope + # when it has that element in the specific scope consisting of the following + # element types: + # + # html in the HTML namespace + # table in the HTML namespace + # template in the HTML namespace + + return ($this->namespaceURI === '' && ($name === 'html' || $name === 'table' || $name === 'template')) ? true : false; + } + + public function isInSelectScope(): bool { + $name = $this->name; + $ns = $this->namespaceURI; + + # The stack of open elements is said to have a particular element in select + # scope when it has that element in the specific scope consisting of all element + # types except the following: + # + # optgroup in the HTML namespace + # option in the HTML namespace + + return ($this->namespaceURI === '' && ($name === 'optgroup' || $name === 'option')) ? false : true; + } + + protected function isInScope(): bool { + $name = $this->name; + $ns = $this->namespaceURI; + + # The stack of open elements is said to have a particular element in scope when + # it has that element in the specific scope consisting of the following element + # types: + # + # applet + # caption + # html + # table + # td + # th + # marquee + # object + # template + # MathML mi + # MathML mo + # MathML mn + # MathML ms + # MathML mtext + # MathML annotation-xml + # SVG foreignObject + # SVG desc + # SVG title + + return (($ns === '' && ($name === 'applet' || $name === 'caption' || $name === 'html' || $name === 'table' || $name === 'td' || $name === 'th' || $name === 'marquee' || $name === 'object' || $name === 'template')) || + ($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) || + ($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title'))) ? true : false; + } } diff --git a/lib/OpenElementsStack.php b/lib/OpenElementsStack.php index e0fdcb3..ef4f2f4 100644 --- a/lib/OpenElementsStack.php +++ b/lib/OpenElementsStack.php @@ -26,19 +26,19 @@ class OpenElementsStack extends Stack { } if ($needle instanceof DOMElement) { - foreach (array_reverse($this->_storage) as $key=>$value) { + foreach (array_reverse($this->_storage) as $key => $value) { if ($value->isSameNode($needle)) { return $key; } } } elseif (is_string($needle)) { - foreach (array_reverse($this->_storage) as $key=>$value) { + foreach (array_reverse($this->_storage) as $key => $value) { if ($value->nodeName === $needle) { return $key; } } } elseif ($needle instanceof \Closure) { - foreach (array_reverse($this->_storage) as $key=>$value) { + foreach (array_reverse($this->_storage) as $key => $value) { if ($needle($value) === true) { return $key; } @@ -48,14 +48,62 @@ class OpenElementsStack extends Stack { return -1; } - public function generateImpliedEndTags() { + public function generateImpliedEndTags(string $exclude = null) { + $tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']; + + if (!is_null($exclude)) { + $key = array_search($exclude, $tags); + if ($key !== false) { + unset($tags[$key]); + $tags = array_values($tags); + } + } + $currentNodeName = end($this->_storage)->nodeName; - while ($currentNodeName === 'caption' || $currentNodeName === 'colgroup' || $currentNodeName === 'dd' || $currentNodeName === 'dt' || $currentNodeName === 'li' || $currentNodeName === 'optgroup' || $currentNodeName === 'option' || $currentNodeName === 'p' || $currentNodeName === 'rb' || $currentNodeName === 'rp' || $currentNodeName === 'rt' || $currentNodeName === 'rtc' || $currentNodeName === 'tbody' || $currentNodeName === 'td' || $currentNodeName === 'tfoot' || $currentNodeName === 'th' || $currentNodeName === 'thead' || $currentNodeName === 'tr') { + while (in_array($currentNodeName, $tags)) { $this->pop(); $currentNodeName = end($this->_storage)->nodeName; } } + public function hasElementInListItemScope(string $elementName): bool { + return $this->hasElementInScope($elementName, 0); + } + + public function hasElementInButtonScope(string $elementName): bool { + return $this->hasElementInScope($elementName, 1); + } + + public function hasElementInTableScope(string $elementName): bool { + return $this->hasElementInScope($elementName, 2); + } + + public function hasElementInSelectScope(string $elementName): bool { + return $this->hasElementInScope($elementName, 3); + } + + protected function hasElementInScope(string $elementName, int $type): bool { + switch ($type) { + case 0: $func = 'isInListScope'; + break; + case 1: $func = 'isInButtonScope'; + break; + case 2: $func = 'isInTableScope'; + break; + case 3: $func = 'isInSelectScope'; + break; + default: return false; + } + + foreach (array_reverse($this->_storage) as $key => $value) { + if ($value->$func()) { + return true; + } + } + + return false; + } + public function __get($property) { $value = parent::__get($property); if (!is_null($value)) { diff --git a/lib/TreeBuilder.php b/lib/TreeBuilder.php index 5cefe5b..76087f5 100644 --- a/lib/TreeBuilder.php +++ b/lib/TreeBuilder.php @@ -1029,14 +1029,6 @@ class TreeBuilder { } } } - # A start tag whose tag name is one of: "address", "article", "aside", - # "blockquote", "center", "details", "dialog", "dir", "div", "dl", "fieldset", - # "figcaption", "figure", "footer", "header", "main", "nav", "ol", "p", - # "section", "summary", "ul" - elseif ($token->name === 'address' || $token->name === 'article' || $token->name === 'aside' || $token->name === 'blockquote' || $token->name === 'center' || $token->name === 'details' || $token->name === 'dialog' || $token->name === 'dir' || $token->name === 'div' || $token->name === 'dl' || $token->name === 'fieldset' || $token->name === 'figcaption' || $token->name === 'figure' || $token->name === 'footer' || $token->name === 'header' || $token->name === 'main' || $token->name === 'nav' || $token->name === 'ol' || $token->name === 'p' || $token->name === 'section' || $token->name === 'summary' || $token->name === 'ul') { - # If the stack of open elements has a p element in button scope, then close a p - # element. - } } # A start tag whose tag name is "frameset" elseif ($token->name === 'frameset') { @@ -1067,7 +1059,41 @@ class TreeBuilder { $this->insertionMode = self::IN_FRAMESET_MODE; } } + # A start tag whose tag name is one of: "address", "article", "aside", + # "blockquote", "center", "details", "dialog", "dir", "div", "dl", "fieldset", + # "figcaption", "figure", "footer", "header", "main", "nav", "ol", "p", + # "section", "summary", "ul" + elseif ($token->name === 'address' || $token->name === 'article' || $token->name === 'aside' || $token->name === 'blockquote' || $token->name === 'center' || $token->name === 'details' || $token->name === 'dialog' || $token->name === 'dir' || $token->name === 'div' || $token->name === 'dl' || $token->name === 'fieldset' || $token->name === 'figcaption' || $token->name === 'figure' || $token->name === 'footer' || $token->name === 'header' || $token->name === 'main' || $token->name === 'nav' || $token->name === 'ol' || $token->name === 'p' || $token->name === 'section' || $token->name === 'summary' || $token->name === 'ul') { + # If the stack of open elements has a p element in button scope, then close a p + # element. + if ($this->stack->hasElementInButtonScope('p')) { + $this->closePElement(); + } + + # Insert an HTML element for the token. + $this->insertStartTagToken($token); + } + # A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" + elseif ($token->name === 'h1' || $token->name === 'h2' || $token->name === 'h3' || $token->name === 'h4' || $token->name === 'h5' || $token->name === 'h6') { + # If the stack of open elements has a p element in button scope, then close a p + # element. + if ($this->stack->hasElementInButtonScope('p')) { + $this->closePElement(); + } + + # If the current node is an HTML element whose tag name is one of "h1", "h2", + # "h3", "h4", "h5", or "h6", then this is a parse error; pop the current node + # off the stack of open elements. + $currentNodeName = $this->stack->currentNodeName; + $currentNodeNamespace = $this->stack->currentNodeNamespace; + if ($currentNodeNamespace === '' && ($currentNodeName === 'h1' || $currentNodeName === 'h2' || $currentNodeName === 'h3' || $currentNodeName === 'h4' || $currentNodeName === 'h5' || $currentNodeName === 'h6')) { + ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name, $currentNodeName . ' content or end tag'); + $this->stack->pop(); + } + # Insert an HTML element for the token. + $this->insertStartTagToken($token); + } } elseif ($token instanceof EndTagToken) { # An end tag whose tag name is "template" @@ -2026,4 +2052,22 @@ class TreeBuilder { # 18. Return to the step labeled Loop. } } + + protected function closePElement() { + # When the steps above say the UA is to close a p element, it means that the UA + # must run the following steps: + + # 1. Generate implied end tags, except for p elements. + $this->stack->generateImpliedEndTags('p'); + # 2. If the current node is not a p element, then this is a parse error. + $currentNodeName = $this->stack->currentNodeName; + if ($currentNodeName !== 'p') { + ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $currentNodeName, (string)$this->stack . ' end tag'); + } + # 3. Pop elements from the stack of open elements until a p element has been + # popped from the stack. + do { + $poppedNodeName = $this->stack->pop()->nodeName; + } while ($poppedNodeName !== 'p'); + } } \ No newline at end of file