Browse Source

TreeBuilder progress

split-manual
Dustin Wilson 4 years ago
parent
commit
205c56679a
  1. 328
      lib/OpenElementsStack.php
  2. 47
      lib/TreeBuilder.php

328
lib/OpenElementsStack.php

@ -66,6 +66,20 @@ class OpenElementsStack extends Stack {
return -1;
}
// Remove an arbitrary element from the array.
public function remove($target) {
$key = $this->search($target);
if ($key === -1) {
return;
} elseif ($key === count($this->_storage) - 1) {
$this->pop();
return;
}
unset($this->_storage[$key]);
$this->_storage = array_values($this->_storage);
}
public function generateImpliedEndTags($exclude = []) {
$tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'];
@ -99,154 +113,252 @@ class OpenElementsStack extends Stack {
}
}
public function hasElementInScope(string $target): bool {
return $this->hasElementInScopeHandler($target);
}
public function hasElementInListItemScope(string $target): bool {
return $this->hasElementInScopeHandler($target, 1);
}
public function hasElementInButtonScope(string $target): bool {
return $this->hasElementInScopeHandler($target, 2);
}
public function hasElementInTableScope(string $target): bool {
return $this->hasElementInScopeHandler($target, 3);
}
public function hasElementInSelectScope(string $target): bool {
return $this->hasElementInScopeHandler($target, 4);
}
protected function hasElementInScopeHandler(string $target, int $type = 0): bool {
switch ($type) {
case 0: $func = 'isElementInScope';
break;
case 1: $func = 'isElementInListScope';
break;
case 2: $func = 'isElementInButtonScope';
break;
case 3: $func = 'isElementInTableScope';
break;
case 4: $func = 'isElementInSelectScope';
break;
default: return false;
}
# 1. Initialize node to be the current node (the bottommost node of the stack).
// Handled by loop.
foreach (array_reverse($this->_storage) as $node) {
# 2. If node is the target node, terminate in a match state.
if ($node->nodeName === $target) {
return true;
}
# 3. Otherwise, if node is one of the element types in list, terminate in a
# failure state.
elseif ($this->$func($node)) {
return false;
}
# Otherwise, set node to the previous entry in the stack of open elements and
# return to step 2. (This will never fail, since the loop will always terminate
# in the previous step if the top of the stack — an html element — is reached.)
// Handled by loop.
}
public function hasElementInScope($target): bool {
# The stack of open elements is said to have a particular element in scope when
# it has that element in the specific scope consisting of the following element
# types:
#
# applet
# caption
# html
# table
# td
# th
# marquee
# object
# template
# MathML mi
# MathML mo
# MathML mn
# MathML ms
# MathML mtext
# MathML annotation-xml
# SVG foreignObject
# SVG desc
# SVG title
return false;
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
}
protected function isElementInListItemScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
# The stack of open elements is said to have a particular element in list item
# scope when it has that element in the specific scope consisting of the
# following element types:
public function hasElementInListItemScope($target): bool {
# The stack of open elements is said to have a particular element in list item scope when it has that element in the specific scope consisting of the following element types:
#
# All the element types listed above for the has an element in scope
# algorithm.
# All the element types listed above for the has an element in scope algorithm.
# ol in the HTML namespace
# ul in the HTML namespace
return ($this->isElementInScope($element) || ($ns === '' && ($name === 'ol' || $name === 'ul'))) ? true : false;
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template',
'ol',
'ul'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
}
protected function isElementInButtonScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
public function hasElementInButtonScope($target): bool {
# The stack of open elements is said to have a particular element in button
# scope when it has that element in the specific scope consisting of the
# following element types:
#
# All the element types listed above for the has an element in scope
# algorithm.
# All the element types listed above for the has an element in scope algorithm.
# button in the HTML namespace
return ($this->isElementInScope($element) || ($ns === '' && $name === 'button')) ? true : false;
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template',
'button'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
}
protected function isElementInTableScope(Element $element): bool {
$name = $element->name;
public function hasElementInTableScope($target): bool {
# The stack of open elements is said to have a particular element in table scope
# when it has that element in the specific scope consisting of the following
# element types:
#
# All the element types listed above for the has an element in scope algorithm.
# html in the HTML namespace
# table in the HTML namespace
# template in the HTML namespace
return ($element->namespaceURI === '' && ($name === 'html' || $name === 'table' || $name === 'template')) ? true : false;
}
// Not sure what to do here. I am going to assume the elements without a
// namespace in the element types listed above are meant for the HTML namespace.
// If so then these listed here are redundant. My interpretation therefore has
// this being an alias for hasElementInScope.
protected function isElementInSelectScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
return $this->hasElementInScope($target);
}
public function hasElementInSelectScope(string $target): bool {
# The stack of open elements is said to have a particular element in select
# scope when it has that element in the specific scope consisting of all element
# types except the following:
#
# All the element types listed above for the has an element in scope algorithm.
# optgroup in the HTML namespace
# option in the HTML namespace
return ($element->namespaceURI === '' && ($name === 'optgroup' || $name === 'option')) ? false : true;
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template',
'button',
'optgroup',
'option'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
}
protected function isElementInScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
# The stack of open elements is said to have a particular element in scope when
# it has that element in the specific scope consisting of the following element
# types:
#
# applet
# caption
# html
# table
# td
# th
# marquee
# object
# template
# MathML mi
# MathML mo
# MathML mn
# MathML ms
# MathML mtext
# MathML annotation-xml
# SVG foreignObject
# SVG desc
# SVG title
protected function hasElementInScopeHandler($target, array $list): bool {
# 1. Initialize node to be the current node (the bottommost node of the stack).
// Handled by loop.
foreach (array_reverse($this->_storage) as $node) {
# 2. If node is the target node, terminate in a match state.
if ($target instanceof DOMElement) {
if ($node->isSameNode($target)) {
return true;
}
} elseif (is_string($target)) {
if ($node->nodeName === $target) {
return true;
}
}
return (($ns === '' && ($name === 'applet' || $name === 'caption' || $name === 'html' || $name === 'table' || $name === 'td' || $name === 'th' || $name === 'marquee' || $name === 'object' || $name === 'template')) ||
($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) ||
($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title'))) ? true : false;
# 3. Otherwise, if node is one of the element types in list, terminate in a
# failure state.
else {
foreach ($list as $namespace => $subList) {
if ($namespace === Parser::HTML_NAMESPACE) {
$namespace = '';
}
if ($node->namespaceURI !== $namespace) {
continue;
}
foreach ($subList as $name) {
if ($node->nodeName === $name) {
return false;
}
}
}
}
# Otherwise, set node to the previous entry in the stack of open elements and
# return to step 2. (This will never fail, since the loop will always terminate
# in the previous step if the top of the stack — an html element — is reached.)
// Handled by loop.
}
return false;
}
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {

47
lib/TreeBuilder.php

@ -1299,7 +1299,7 @@ class TreeBuilder {
elseif ($token->name === 'body' || $token->name === 'html') {
# If the stack of open elements does not have a body element in scope, this is a
# parse error; ignore the token.
if ($this->stack->search('body') === -1) {
if (!$this->stack->hasElementInScope('body')) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body');
}
# Otherwise, if there is a node in the stack of open elements that is not either
@ -1359,6 +1359,51 @@ class TreeBuilder {
$this->stack->popUntil($token->name);
}
}
# An end tag whose tag name is "form"
elseif ($token->name === 'form') {
# If there is no template element on the stack of open elements, then run these
# substeps:
if ($this->stack->search('template') === -1) {
# 1. Let node be the element that the form element pointer is set to, or null if it
# is not set to an element.
$node = $this->formElement;
# 2. Set the form element pointer to null.
$this->formElement = null;
# 3. If node is null or if the stack of open elements does not have node in
# scope, then this is a parse error; return and ignore the token.
if (is_null($node) || !$this->stack->hasElementInScope($node)) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
return true;
}
# 4. Generate implied end tags.
$this->stack->generateImpliedEndTags();
# 5. If the current node is not node, then this is a parse error.
if (!$this->stack->currentNode->isSameNode($node)) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# 6. Remove node from the stack of open elements
$this->stack->remove($node);
}
# If there is a template element on the stack of open elements, then run these
# substeps instead:
else {
# 1. If the stack of open elements does not have a form element in scope, then
# this is a parse error; return and ignore the token.
if ($this->stack->hasElementInScope('form')) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
return true;
}
# 2. Generate implied end tags.
$this->stack->generateImpliedEndTags();
# 3. If the current node is not a form element, then this is a parse error.
if (!$this->stack->currentNodeName !== 'form') {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# 4. Pop elements from the stack of open elements until a form element has been
# popped from the stack.
$this->stack->popUntil('form');
}
}
}
# An end-of-file token
elseif ($token instanceof EOFToken) {

Loading…
Cancel
Save