Browse Source

Another daily TreeBuilder

ns
Dustin Wilson 6 years ago
parent
commit
b4c3c08800
  1. 6
      lib/Exception.php
  2. 94
      lib/OpenElementsStack.php
  3. 117
      lib/TreeBuilder.php

6
lib/Exception.php

@ -11,6 +11,8 @@ class Exception extends \Exception {
const STACK_INVALID_INDEX = 10201; const STACK_INVALID_INDEX = 10201;
const STACK_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10202; const STACK_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10202;
const STACK_ELEMENT_STRING_ARRAY_EXPECTED = 10203;
const STACK_STRING_ARRAY_EXPECTED = 10204;
const DATA_NODATA = 10301; const DATA_NODATA = 10301;
const DATA_INVALID_DATA_CONSUMPTION_LENGTH = 10302; const DATA_INVALID_DATA_CONSUMPTION_LENGTH = 10302;
@ -33,7 +35,9 @@ class Exception extends \Exception {
10101 => 'Non-empty Document supplied as argument for Parser', 10101 => 'Non-empty Document supplied as argument for Parser',
10201 => '%s is an invalid Stack index', 10201 => '%s is an invalid Stack index',
10202 => 'Element, Document, or DOMDocumentFragment expected for fragment context; found %s', 10202 => 'Element, Document, or DOMDocumentFragment expected for fragment context',
10203 => 'Element, string, or array expected',
10203 => 'String or array expected',
10301 => 'Data string expected; found %s', 10301 => 'Data string expected; found %s',
10302 => '%s is an invalid data consumption length; a value of 1 or above is expected', 10302 => '%s is an invalid data consumption length; a value of 1 or above is expected',

94
lib/OpenElementsStack.php

@ -13,13 +13,31 @@ class OpenElementsStack extends Stack {
// too. // too.
if ((!is_null($fragmentContext) && !$fragmentContext instanceof DOMDocumentFragment && !$fragmentContext instanceof DOMDocument && !$fragmentContext instanceof DOMElement) || if ((!is_null($fragmentContext) && !$fragmentContext instanceof DOMDocumentFragment && !$fragmentContext instanceof DOMDocument && !$fragmentContext instanceof DOMElement) ||
(is_null($fragmentContext) && $fragmentCase)) { (is_null($fragmentContext) && $fragmentCase)) {
throw new Exception(Exception::STACK_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED, gettype($fragmentContext)); throw new Exception(Exception::STACK_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED);
} }
$this->fragmentCase = $fragmentCase; $this->fragmentCase = $fragmentCase;
$this->fragmentContext = $fragmentContext; $this->fragmentContext = $fragmentContext;
} }
public function popUntil($target) {
if ($target instanceof Element) {
do {
$node = $this->pop;
} while (!$node->isSameNode($target));
} elseif (is_string($target)) {
do {
$poppedNodeName = $this->pop()->nodeName;
} while ($poppedNodeName !== $target);
} elseif (is_array($target)) {
do {
$poppedNodeName = $this->pop()->nodeName;
} while (!in_array($poppedNodeName, $target));
} else {
throw new Exception(Exception::STACK_ELEMENT_STRING_ARRAY_EXPECTED);
}
}
public function search($needle): int { public function search($needle): int {
if (!$needle) { if (!$needle) {
return -1; return -1;
@ -48,13 +66,28 @@ class OpenElementsStack extends Stack {
return -1; return -1;
} }
public function generateImpliedEndTags(string $exclude = null) { public function generateImpliedEndTags($exclude = []) {
$tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']; $tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'];
if (!is_null($exclude)) { if (is_string($exclude)) {
$key = array_search($exclude, $tags); $exclude = [$exclude];
if ($key !== false) { }
unset($tags[$key]);
if (!is_array($exclude)) {
throw new Exception(Exception::STACK_STRING_ARRAY_EXPECTED);
}
if (count($exclude) > 0) {
$modified = false;
foreach ($exclude as $e) {
$key = array_search($e, $tags);
if ($key !== false) {
unset($tags[$key]);
$modified = true;
}
}
if ($modified) {
$tags = array_values($tags); $tags = array_values($tags);
} }
} }
@ -66,39 +99,58 @@ class OpenElementsStack extends Stack {
} }
} }
public function hasElementInListItemScope(string $elementName): bool { public function hasElementInScope(string $target): bool {
return $this->hasElementInScope($elementName, 0); return $this->hasElementInScopeHandler($target);
}
public function hasElementInListItemScope(string $target): bool {
return $this->hasElementInScopeHandler($target, 1);
} }
public function hasElementInButtonScope(string $elementName): bool { public function hasElementInButtonScope(string $target): bool {
return $this->hasElementInScope($elementName, 1); return $this->hasElementInScopeHandler($target, 2);
} }
public function hasElementInTableScope(string $elementName): bool { public function hasElementInTableScope(string $target): bool {
return $this->hasElementInScope($elementName, 2); return $this->hasElementInScopeHandler($target, 3);
} }
public function hasElementInSelectScope(string $elementName): bool { public function hasElementInSelectScope(string $target): bool {
return $this->hasElementInScope($elementName, 3); return $this->hasElementInScopeHandler($target, 4);
} }
protected function hasElementInScope(string $elementName, int $type): bool { protected function hasElementInScopeHandler(string $target, int $type = 0): bool {
switch ($type) { switch ($type) {
case 0: $func = 'isElementInListScope'; case 0: $func = 'isElementInScope';
break;
case 1: $func = 'isElementInListScope';
break; break;
case 1: $func = 'isElementInButtonScope'; case 2: $func = 'isElementInButtonScope';
break; break;
case 2: $func = 'isElementInTableScope'; case 3: $func = 'isElementInTableScope';
break; break;
case 3: $func = 'isElementInSelectScope'; case 4: $func = 'isElementInSelectScope';
break; break;
default: return false; default: return false;
} }
foreach (array_reverse($this->_storage) as $key => $value) { # 1. Initialize node to be the current node (the bottommost node of the stack).
if ($this->$func($value)) { // Handled by loop.
foreach (array_reverse($this->_storage) as $node) {
# 2. If node is the target node, terminate in a match state.
if ($node->nodeName === $target) {
return true; return true;
} }
# 3. Otherwise, if node is one of the element types in list, terminate in a
# failure state.
elseif ($this->$func($node)) {
return false;
}
# Otherwise, set node to the previous entry in the stack of open elements and
# return to step 2. (This will never fail, since the loop will always terminate
# in the previous step if the top of the stack — an html element — is reached.)
// Handled by loop.
} }
return false; return false;

117
lib/TreeBuilder.php

@ -703,9 +703,7 @@ class TreeBuilder {
} }
# 3. Pop elements from the stack of open elements until a template element has been popped from the stack. # 3. Pop elements from the stack of open elements until a template element has been popped from the stack.
do { $this->stack->popUntil('template');
$poppedNodeName = $this->stack->pop()->nodeName;
} while ($poppedNodeName !== 'template');
# 4. Clear the list of active formatting elements up to the last marker. # 4. Clear the list of active formatting elements up to the last marker.
$this->activeFormattingElementsList->clearToTheLastMarker(); $this->activeFormattingElementsList->clearToTheLastMarker();
@ -1154,6 +1152,7 @@ class TreeBuilder {
elseif ($token->name === 'li') { elseif ($token->name === 'li') {
# 1. Set the frameset-ok flag to "not ok". # 1. Set the frameset-ok flag to "not ok".
$this->framesetOk = false; $this->framesetOk = false;
# 2. Initialize node to be the current node (the bottommost node of the stack). # 2. Initialize node to be the current node (the bottommost node of the stack).
# 3. Loop: If node is an li element, then run these substeps: # 3. Loop: If node is an li element, then run these substeps:
for ($i = $this->stack->length - 1; $i >= 0; $i--) { for ($i = $this->stack->length - 1; $i >= 0; $i--) {
@ -1165,16 +1164,13 @@ class TreeBuilder {
$this->stack->generateImpliedEndTags('li'); $this->stack->generateImpliedEndTags('li');
# 2. If the current node is not an li element, then this is a parse error. # 2. If the current node is not an li element, then this is a parse error.
$currentNodeName = $this->stack->currentNodeName; if ($this->stack->currentNodeName !== 'li') {
if ($currentNodeName !== 'li') { ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $nodeName);
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $currentNodeName);
} }
# 3. Pop elements from the stack of open elements until an li element has been # 3. Pop elements from the stack of open elements until an li element has been
# popped from the stack. # popped from the stack.
do { $this->stack->popUntil('li');
$poppedNodeName = $this->stack->pop()->nodeName;
} while ($poppedNodeName !== 'li');
# 4. Jump to the step labeled Done below. # 4. Jump to the step labeled Done below.
break; break;
@ -1182,7 +1178,7 @@ class TreeBuilder {
# 4. If node is in the special category, but is not an address, div, or p # 4. If node is in the special category, but is not an address, div, or p
# element, then jump to the step labeled Done below. # element, then jump to the step labeled Done below.
elseif ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) { if ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) {
break; break;
} }
@ -1200,6 +1196,96 @@ class TreeBuilder {
# 7. Finally, insert an HTML element for the token. # 7. Finally, insert an HTML element for the token.
$this->insertStartTagToken($token); $this->insertStartTagToken($token);
} }
# A start tag whose tag name is one of: "dd", "dt"
elseif ($token->name === 'dd' || $token->name === 'dt') {
# 1. Set the frameset-ok flag to "not ok".
$this->framesetOk = false;
# 2. Initialize node to be the current node (the bottommost node of the stack).
for ($i = $this->stack->length - 1; $i >= 0; $i--) {
$node = $this->stack[$i];
$nodeName = $node->nodeName;
// Combining these two sets of instructions as they're identical except for the
// element name.
# 3. Loop: If node is a dd element, then run these substeps:
# 4. If node is a dt element, then run these substeps:
if ($nodeName === 'dd' || $nodeName === 'dt') {
# 1. Generate implied end tags, except for dd or dt elements.
$this->stack->generateImpliedEndTags(['dd', 'dt']);
# 2. If the current node is not a dd or dt element, then this is a parse error.
if ($this->stack->currentNodeName !== $nodeName) {
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $nodeName);
}
# 3. Pop elements from the stack of open elements until a dd or dt element has been
# popped from the stack.
$this->stack->popUntil(['dd', 'dt']);
# 4. Jump to the step labeled Done below.
break;
}
# 5. If node is in the special category, but is not an address, div, or p
# element, then jump to the step labeled Done below.
if ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) {
break;
}
# 6. Otherwise, set node to the previous entry in the stack of open elements and
# return to the step labeled Loop.
// The loop handles that.
}
# 7. Done: If the stack of open elements has a p element in button scope, then
# close a p element.
if ($this->stack->hasElementInButtonScope('p')) {
$this->closePElement();
}
# 8. Finally, insert an HTML element for the token.
$this->insertStartTagToken($token);
}
# A start tag whose tag name is "plaintext"
elseif ($token->name === 'plaintext') {
# If the stack of open elements has a p element in button scope, then close a p
# element.
if ($this->stack->hasElementInButtonScope('p')) {
$this->closePElement();
}
# Insert an HTML element for the token.
$this->insertStartTagToken($token);
# Switch the tokenizer to the §8.2.4.5 PLAINTEXT state.
$this->tokenizer->state = Tokenizer::PLAINTEXT_STATE;
}
# A start tag whose tag name is "button"
elseif ($token->name === 'button') {
# 1. If the stack of open elements has a button element in scope, then run these
# substeps:
if ($this->stack->hasElementInScope('button')) {
# 1. Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
# 2. Generate implied end tags.
$this->stack->generateImpliedEndTags();
# 3. Pop elements from the stack of open elements until a button element has
# been popped from the stack.
$this->stack->popUntil('button');
}
# 2. Reconstruct the active formatting elements, if any.
$this->activeFormattingElementsList->reconstruct();
# 3. Insert an HTML element for the token.
$this->insertStartTagToken($token);
# 4. Set the frameset-ok flag to "not ok".
$this->framesetOk = false;
}
} }
elseif ($token instanceof EndTagToken) { elseif ($token instanceof EndTagToken) {
# An end tag whose tag name is "template" # An end tag whose tag name is "template"
@ -1668,10 +1754,7 @@ class TreeBuilder {
$count = $this->stack->length - 1; $count = $this->stack->length - 1;
while (true) { while (true) {
if (strtolower($nodeName) === $token->name) { if (strtolower($nodeName) === $token->name) {
do { $this->stack->popUntil($node);
$popped = $this->stack->pop();
} while ($popped !== $node && !is_null($popped));
break; break;
} }
@ -2172,9 +2255,7 @@ class TreeBuilder {
} }
# 3. Pop elements from the stack of open elements until a p element has been # 3. Pop elements from the stack of open elements until a p element has been
# popped from the stack. # popped from the stack.
do { $this->stack->popUntil('p');
$poppedNodeName = $this->stack->pop()->nodeName;
} while ($poppedNodeName !== 'p');
} }
protected function isElementSpecial(Element $element): bool { protected function isElementSpecial(Element $element): bool {
@ -2191,6 +2272,6 @@ class TreeBuilder {
# tbody, td, template, textarea, tfoot, th, thead, title, tr, track, ul, wbr, # tbody, td, template, textarea, tfoot, th, thead, title, tr, track, ul, wbr,
# xmp; MathML mi, MathML mo, MathML mn, MathML ms, MathML mtext, and MathML # xmp; MathML mi, MathML mo, MathML mn, MathML ms, MathML mtext, and MathML
# annotation-xml; and SVG foreignObject, SVG desc, and SVG title. # annotation-xml; and SVG foreignObject, SVG desc, and SVG title.
return (($ns === '' && ($name === 'address' || $name === 'applet' || $name === 'area' || $name === 'article' || $name === 'aside' || $name === 'base' || $name === 'basefont' || $name === 'bgsound' || $name === 'blockquote' || $name === 'body' || $name === 'br' || $name === 'button' || $name === 'caption' || $name === 'center' || $name === 'col' || $name === 'colgroup' || $name === 'dd' || $name === 'details' || $name === 'dir' || $name === 'div' || $name === 'dl' || $name === 'dt' || $name === 'embed' || $name === 'fieldset' || $name === 'figcaption' || $name === 'figure' || $name === 'footer' || $name === 'form' || $name === 'frame' || $name === 'frameset' || $name === 'h1' || $name === 'h2' || $name === 'h3' || $name === 'h4' || $name === 'h5' || $name === 'h6' || $name === 'head' || $name === 'header' || $name === 'hr' || $name === 'html' || $name === 'iframe' || $name === 'img' || $name === 'input' || $name === 'li' || $name === 'link' || $name === 'listing' || $name === 'main' || $name === 'marquee' || $name === 'meta' || $name === 'nav' || $name === 'noembed' || $name === 'noframes' || $name === 'noscript' || $name === 'object' || $name === 'ol' || $name === 'p' || $name === 'param' || $name === 'plaintext' || $name === 'pre' || $name === 'script' || $name === 'section' || $name === 'select' || $name === 'source' || $name === 'style' || $name === 'summary' || $name === 'table' || $name === 'tbody' || $name === 'td' || $name === 'template' || $name === 'textarea' || $name === 'tfoot' || $name === 'th' || $name === 'thead' || $name === 'title' || $name === 'tr' || $name === 'track' || $name === 'ul' || $name === 'wbr' || $name === 'xmp')) || ($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) || ($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title')); return (($ns === '' && ($name === 'address' || $name === 'applet' || $name === 'area' || $name === 'article' || $name === 'aside' || $name === 'base' || $name === 'basefont' || $name === 'bgsound' || $name === 'blockquote' || $name === 'body' || $name === 'br' || $name === 'button' || $name === 'caption' || $name === 'center' || $name === 'col' || $name === 'colgroup' || $name === 'dd' || $name === 'details' || $name === 'dir' || $name === 'div' || $name === 'dl' || $name === 'dt' || $name === 'embed' || $name === 'fieldset' || $name === 'figcaption' || $name === 'figure' || $name === 'footer' || $name === 'form' || $name === 'frame' || $name === 'frameset' || $name === 'h1' || $name === 'h2' || $name === 'h3' || $name === 'h4' || $name === 'h5' || $name === 'h6' || $name === 'head' || $name === 'header' || $name === 'hr' || $name === 'html' || $name === 'iframe' || $name === 'img' || $name === 'input' || $name === 'li' || $name === 'link' || $name === 'listing' || $name === 'main' || $name === 'marquee' || $name === 'meta' || $name === 'nav' || $name === 'noembed' || $name === 'noframes' || $name === 'noscript' || $name === 'object' || $name === 'ol' || $name === 'p' || $name === 'param' || $name === 'plaintext' || $name === 'pre' || $name === 'script' || $name === 'section' || $name === 'select' || $name === 'source' || $name === 'style' || $name === 'summary' || $name === 'table' || $name === 'tbody' || $name === 'td' || $name === 'template' || $name === 'textarea' || $name === 'tfoot' || $name === 'th' || $name === 'thead' || $name === 'title' || $name === 'tr' || $name === 'track' || $name === 'ul' || $name === 'wbr' || $name === 'xmp')) || ($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) || ($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title')));
} }
} }
Loading…
Cancel
Save