From c414f3dbfab410173b52897f3b4aed0732fbcd29 Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Wed, 22 Aug 2018 09:11:07 -0500 Subject: [PATCH] Changed Stack to abstract class Moved the stack of open elements to its own class OpenElementsStack and made it and ActiveFormattingElementsList inherit from an abstract Stack class. --- lib/ActiveFormattingElementsList.php | 46 ++++-------- lib/Exception.php | 32 ++++----- lib/OpenElementsStack.php | 104 +++++++++++++++++++++++++++ lib/Parser.php | 2 +- lib/Stack.php | 93 ++---------------------- lib/Tokenizer.php | 2 +- lib/TreeBuilder.php | 2 +- 7 files changed, 138 insertions(+), 143 deletions(-) create mode 100644 lib/OpenElementsStack.php diff --git a/lib/ActiveFormattingElementsList.php b/lib/ActiveFormattingElementsList.php index 7b156fd..1fd38e5 100644 --- a/lib/ActiveFormattingElementsList.php +++ b/lib/ActiveFormattingElementsList.php @@ -14,17 +14,17 @@ namespace dW\HTML5; # In addition, each element in the list of active formatting elements is # associated with the token for which it was created, so that further elements # can be created for that token if necessary. -class ActiveFormattingElementsList implements \ArrayAccess { +class ActiveFormattingElementsList extends Stack { protected $_storage = []; protected $stack; - public function __construct(Stack $stack) { + public function __construct(OpenElementsStack $stack) { $this->stack = $stack; } public function offsetSet($offset, $value) { - if ($offset < 0) { - throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX); + if ($offset < 0 || $offset > count($this->_storage) - 1) { + throw new Exception(Exception::STACK_INVALID_INDEX); } if (is_null($offset)) { @@ -78,32 +78,10 @@ class ActiveFormattingElementsList implements \ArrayAccess { # 2. Add element to the list of active formatting elements. $this->_storage[] = $value; } else { - $this->_storage[$offset] = $value; + parent::offsetSet($offset, $value); } } - public function offsetExists($offset) { - return isset($this->_storage[$offset]); - } - - public function offsetUnset($offset) { - if ($offset < 0 || $offset > count($this->$storage) - 1) { - throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX); - } - - unset($this->_storage[$offset]); - // Reindex the array. - $this->_storage = array_values($this->_storage); - } - - public function offsetGet($offset) { - if ($offset < 0 || $offset > count($this->$storage) - 1) { - throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX); - } - - return $this->_storage[$offset]; - } - public function insert(StartTagToken $token, \DOMElement $element) { $this->_storage[] = [ 'token' => $token, @@ -115,17 +93,14 @@ class ActiveFormattingElementsList implements \ArrayAccess { $this->offsetSet(null, new ActiveFormattingElementMarker()); } - public function pop() { - return array_pop($this->_storage); - } - public function reconstruct() { # When the steps below require the UA to reconstruct the active formatting # elements, the UA must perform the following steps: // Yes, I know this uses gotos, but here are the reasons for using them: // 1. The spec seems to actively encourage using them, even providing // suggestions on what to name the labels. - // 2. It'd be a pain to program and maintain without them because of this. + // 2. It'd be a pain to program and maintain without them because the algorithm + // jumps around all over the place. # 1. If there are no entries in the list of active formatting elements, then # there is nothing to reconstruct; stop this algorithm. @@ -208,6 +183,11 @@ class ActiveFormattingElementsList implements \ArrayAccess { } public function __get($property) { + $value = parent::__get($property); + if (!is_null($value)) { + return $value; + } + switch ($property) { case 'lastMarker': for ($end = count($this->_storage) - 1, $i = $end; $i >= 0; $i--) { @@ -218,8 +198,6 @@ class ActiveFormattingElementsList implements \ArrayAccess { return false; break; - case 'length': return count($this->_storage); - break; default: return null; } } diff --git a/lib/Exception.php b/lib/Exception.php index 711b558..86e82e9 100644 --- a/lib/Exception.php +++ b/lib/Exception.php @@ -15,18 +15,16 @@ class Exception extends \Exception { const STACK_DOMNODE_ONLY = 10202; const STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10203; - const ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX = 10301; + const DATASTREAM_NODATA = 10301; + const DATASTREAM_INVALID_DATA_CONSUMPTION_LENGTH = 10302; - const DATASTREAM_NODATA = 10401; - const DATASTREAM_INVALID_DATA_CONSUMPTION_LENGTH = 10402; + const DOM_DOMDOCUMENT_EXPECTED = 10401; + const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10402; - const DOM_DOMDOCUMENT_EXPECTED = 10501; - const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10502; + const TOKENIZER_INVALID_STATE = 10501; - const TOKENIZER_INVALID_STATE = 10601; - - const TREEBUILDER_FORMELEMENT_EXPECTED = 10701; - const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10702; + const TREEBUILDER_FORMELEMENT_EXPECTED = 10601; + const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10602; protected static $messages = [10000 => 'Invalid error code', 10001 => 'Unknown error; escaping', @@ -40,18 +38,16 @@ class Exception extends \Exception { 10202 => 'Instances of DOMNode are the only types allowed in a Stack', 10203 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected for fragment context; found %s', - 10301 => '%s is an invalid ActiveFormattingElementsList index', - - 10401 => 'Data string expected; found %s', - 10402 => '%s is an invalid data consumption length; a value of 1 or above is expected', + 10301 => 'Data string expected; found %s', + 10302 => '%s is an invalid data consumption length; a value of 1 or above is expected', - 10501 => 'The first argument must be an instance of \DOMElement or null; found %s', - 10502 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s', + 10401 => 'The first argument must be an instance of \DOMElement or null; found %s', + 10402 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s', - 10601 => 'The Tokenizer has entered an invalid state', + 10501 => 'The Tokenizer has entered an invalid state', - 10701 => 'Form element expected, found %s', - 10702 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s']; + 10601 => 'Form element expected, found %s', + 10602 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s']; public function __construct(int $code, ...$args) { if (!isset(static::$messages[$code])) { diff --git a/lib/OpenElementsStack.php b/lib/OpenElementsStack.php new file mode 100644 index 0000000..ebfcb00 --- /dev/null +++ b/lib/OpenElementsStack.php @@ -0,0 +1,104 @@ +fragmentCase = $fragmentCase; + $this->fragmentContext = $fragmentContext; + } + + public function search(mixed $needle): int { + if (!$needle) { + return -1; + } + + if ($needle instanceof DOMElement) { + foreach (array_reverse($this->_storage) as $key=>$value) { + if ($value->isSameNode($needle)) { + return $key; + } + } + } elseif (is_string($needle)) { + foreach (array_reverse($this->_storage) as $key=>$value) { + if ($value->nodeName === $needle) { + return $key; + } + } + } + + return -1; + } + + public function generateImpliedEndTags() { + $currentNodeName = end($this->_storage)->nodeName; + while ($currentNodeName === 'caption' || $currentNodeName === 'colgroup' || $currentNodeName === 'dd' || $currentNodeName === 'dt' || $currentNodeName === 'li' || $currentNodeName === 'optgroup' || $currentNodeName === 'option' || $currentNodeName === 'p' || $currentNodeName === 'rb' || $currentNodeName === 'rp' || $currentNodeName === 'rt' || $currentNodeName === 'rtc' || $currentNodeName === 'tbody' || $currentNodeName === 'td' || $currentNodeName === 'tfoot' || $currentNodeName === 'th' || $currentNodeName === 'thead' || $currentNodeName === 'tr') { + $this->pop(); + $currentNodeName = end($this->_storage)->nodeName; + } + } + + public function __get($property) { + $value = parent::__get($property); + if (!is_null($value)) { + return $value; + } + + switch ($property) { + case 'adjustedCurrentNode': + # The adjusted current node is the context element if the parser was created by + # the HTML fragment parsing algorithm and the stack of open elements has only one + # element in it (fragment case); otherwise, the adjusted current node is the + # current node. + return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode; + break; + case 'adjustedCurrentNodeName': + $adjustedCurrentNode = $this->adjustedCurrentNode; + return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null; + break; + case 'adjustedCurrentNodeNamespace': + $adjustedCurrentNode = $this->adjustedCurrentNode; + return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null; + break; + case 'currentNode': + $currentNode = end($this->_storage); + return ($currentNode) ? $currentNode : null; + break; + case 'currentNodeName': + $currentNode = $this->currentNode; + return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null; + break; + case 'currentNodeNamespace': + $currentNode = $this->currentNode; + return (!is_null($currentNode)) ? $currentNode->namespaceURI: null; + break; + default: return null; + } + } + + // Used when listing expected elements when returning parse errors + public function __toString(): string { + if (count($this->_storage) > 1) { + // Don't output the name of the root element. + for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) { + $temp[] = $this->_storage[$i]->nodeName; + } + + return implode(', ', array_unique($temp)); + } else { + return ''; + } + } +} diff --git a/lib/Parser.php b/lib/Parser.php index 99b0d5d..1213d5d 100644 --- a/lib/Parser.php +++ b/lib/Parser.php @@ -77,7 +77,7 @@ class Parser { setlocale(LC_CTYPE, 'en_US.UTF8'); // Initialize the stack of open elements. - static::$instance->stack = new Stack(static::$instance->fragmentCase, static::$instance->fragmentContext); + static::$instance->stack = new OpenElementsStack(static::$instance->fragmentCase, static::$instance->fragmentContext); // Initialize the tokenizer. static::$instance->tokenizer = new Tokenizer(static::$instance->data, static::$instance->stack); // Initialize the tree builder. diff --git a/lib/Stack.php b/lib/Stack.php index 9dc6cc0..2c8c598 100644 --- a/lib/Stack.php +++ b/lib/Stack.php @@ -2,25 +2,11 @@ declare(strict_types=1); namespace dW\HTML5; -class Stack implements \ArrayAccess { +abstract class Stack implements \ArrayAccess { protected $_storage = []; protected $fragmentCase; protected $fragmentContext; - public function __construct(bool $fragmentCase = false, $fragmentContext = null) { - // If the fragment context is not null and is not a document fragment, document, - // or element then we have a problem. Additionally, if the parser is created for - // parsing a fragment and the fragment context is null then we have a problem, - // too. - if ((!is_null($fragmentContext) && !$fragmentContext instanceof DOMDocumentFragment && !$fragmentContext instanceof DOMDocument && !$fragmentContext instanceof DOMElement) || - (is_null($fragmentContext) && $fragmentCase)) { - throw new Exception(Exception::STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED, gettype($fragmentContext)); - } - - $this->fragmentCase = $fragmentCase; - $this->fragmentContext = $fragmentContext; - } - public function offsetSet($offset, $value) { if ($offset < 0) { throw new Exception(Exception::STACK_INVALID_INDEX); @@ -38,15 +24,17 @@ class Stack implements \ArrayAccess { } public function offsetUnset($offset) { - if ($offset < 0 || $offset > count($this->$storage) - 1) { + if ($offset < 0 || $offset > count($this->_storage) - 1) { throw new Exception(Exception::STACK_INVALID_INDEX); } unset($this->_storage[$offset]); + // Reindex the array. + $this->_storage = array_values($this->_storage); } public function offsetGet($offset) { - if ($offset < 0 || $offset > count($this->$storage) - 1) { + if ($offset < 0 || $offset > count($this->_storage) - 1) { throw new Exception(Exception::STACK_INVALID_INDEX); } @@ -57,82 +45,11 @@ class Stack implements \ArrayAccess { return array_pop($this->_storage); } - public function search(mixed $needle): int { - if (!$needle) { - return -1; - } - - if ($needle instanceof DOMElement) { - foreach (array_reverse($this->_storage) as $key=>$value) { - if ($value->isSameNode($needle)) { - return $key; - } - } - } elseif (is_string($needle)) { - foreach (array_reverse($this->_storage) as $key=>$value) { - if ($value->nodeName === $needle) { - return $key; - } - } - } - - return -1; - } - - public function generateImpliedEndTags() { - $currentNodeName = end($this->_storage)->nodeName; - while ($currentNodeName === 'caption' || $currentNodeName === 'colgroup' || $currentNodeName === 'dd' || $currentNodeName === 'dt' || $currentNodeName === 'li' || $currentNodeName === 'optgroup' || $currentNodeName === 'option' || $currentNodeName === 'p' || $currentNodeName === 'rb' || $currentNodeName === 'rp' || $currentNodeName === 'rt' || $currentNodeName === 'rtc' || $currentNodeName === 'tbody' || $currentNodeName === 'td' || $currentNodeName === 'tfoot' || $currentNodeName === 'th' || $currentNodeName === 'thead' || $currentNodeName === 'tr') { - $this->pop(); - $currentNodeName = end($this->_storage)->nodeName; - } - } - public function __get($property) { switch ($property) { - case 'adjustedCurrentNode': - # The adjusted current node is the context element if the parser was created by - # the HTML fragment parsing algorithm and the stack of open elements has only one - # element in it (fragment case); otherwise, the adjusted current node is the - # current node. - return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode; - break; - case 'adjustedCurrentNodeName': - $adjustedCurrentNode = $this->adjustedCurrentNode; - return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null; - break; - case 'adjustedCurrentNodeNamespace': - $adjustedCurrentNode = $this->adjustedCurrentNode; - return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null; - break; - case 'currentNode': - $currentNode = end($this->_storage); - return ($currentNode) ? $currentNode : null; - break; - case 'currentNodeName': - $currentNode = $this->currentNode; - return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null; - break; - case 'currentNodeNamespace': - $currentNode = $this->currentNode; - return (!is_null($currentNode)) ? $currentNode->namespaceURI: null; - break; case 'length': return count($this->_storage); break; default: return null; } } - - // Used when listing expected elements when returning parse errors - public function __toString(): string { - if (count($this->_storage) > 1) { - // Don't output the name of the root element. - for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) { - $temp[] = $this->_storage[$i]->nodeName; - } - - return implode(', ', array_unique($temp)); - } else { - return ''; - } - } } diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php index 80d3544..0ae732d 100644 --- a/lib/Tokenizer.php +++ b/lib/Tokenizer.php @@ -78,7 +78,7 @@ class Tokenizer { const CTYPE_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'; const CTYPE_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - public function __construct(DataStream $data, Stack $stack) { + public function __construct(DataStream $data, OpenElementsStack $stack) { $this->state = self::DATA_STATE; $this->data = $data; $this->stack = $stack; diff --git a/lib/TreeBuilder.php b/lib/TreeBuilder.php index bd7e205..76ded6c 100644 --- a/lib/TreeBuilder.php +++ b/lib/TreeBuilder.php @@ -73,7 +73,7 @@ class TreeBuilder { const QUIRKS_MODE_LIMITED = 2; - public function __construct(DOM $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, Stack $stack, Tokenizer $tokenizer) { + public function __construct(DOM $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Tokenizer $tokenizer) { // If the form element isn't an instance of DOMElement that has a node name of // "form" or null then there's a problem. if (!is_null($formElement) && !($formElement instanceof DOMElement && $formElement->nodeName === 'form')) {