From 37205029a3bec52de27e4bd1f63ee888387505fc Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Sat, 4 Aug 2018 18:01:15 -0500 Subject: [PATCH] Decoupled Stack from Parser --- lib/ActiveFormattingElementsList.php | 9 +++++++-- lib/DataStream.php | 2 +- lib/Parser.php | 6 ++++-- lib/Stack.php | 9 ++++++++- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/lib/ActiveFormattingElementsList.php b/lib/ActiveFormattingElementsList.php index 13dec23..54c5e3a 100644 --- a/lib/ActiveFormattingElementsList.php +++ b/lib/ActiveFormattingElementsList.php @@ -16,6 +16,11 @@ namespace dW\HTML5; # can be created for that token if necessary. class ActiveFormattingElementsList implements \ArrayAccess { protected $_storage = []; + protected $stack; + + public function __construct(Stack $stack) { + $this->stack = $stack; + } public function offsetSet($offset, $value) { if ($offset < 0) { @@ -132,7 +137,7 @@ class ActiveFormattingElementsList implements \ArrayAccess { # elements is a marker, or if it is an element that is in the stack of open # elements, then there is nothing to reconstruct; stop this algorithm. $entry = end($this->_storage); - if ($entry instanceof ActiveFormattingElementMarker || in_array($entry['element'], Parser::$instance->stack)) { + if ($entry instanceof ActiveFormattingElementMarker || in_array($entry['element'], $this->stack)) { return; } @@ -153,7 +158,7 @@ class ActiveFormattingElementsList implements \ArrayAccess { # 6. If entry is neither a marker nor an element that is also in the stack of # open elements, go to the step labeled Rewind. - if (!$entry instanceof ActiveFormattingElementMarker && !in_array($entry['element'], Parser::$instance->stack)) { + if (!$entry instanceof ActiveFormattingElementMarker && !in_array($entry['element'], $this->stack)) { goto rewind; } diff --git a/lib/DataStream.php b/lib/DataStream.php index 890e59b..265025a 100644 --- a/lib/DataStream.php +++ b/lib/DataStream.php @@ -109,7 +109,7 @@ class DataStream // OPTIMIZATION: When this spec states to return a character token of any kind this // method will just return the character. The token will be emitted from - // Parser::tokenize() instead. Likewise, if the spec states to return nothing this + // Parser::parse() instead. Likewise, if the spec states to return nothing this // method will instead return '&' because every single use of "tokenizing a // character reference" in the spec this emits a '&' character token upon failure. diff --git a/lib/Parser.php b/lib/Parser.php index 8837aac..d00a0e5 100644 --- a/lib/Parser.php +++ b/lib/Parser.php @@ -98,8 +98,6 @@ class Parser { protected function __construct() { $this->insertionMode = static::INITIAL_MODE; $this->quirksMode = static::QUIRKS_MODE_OFF; - $this->stack = new Stack(); - $this->activeFormattingElementsList = new ActiveFormattingElementsList(); static::$instance = $this; } @@ -129,6 +127,10 @@ class Parser { // work on basic latin characters. Used extensively when tokenizing. setlocale(LC_CTYPE, 'en_US.UTF8'); + // Initialize the stack of open elements. + static::$instance->stack = new Stack(static::$instance->fragmentCase, static::$instance->fragmentContext); + // Initialize the list of active formatting elements. + static::$instance->activeFormattingElementsList = new ActiveFormattingElementsList(static::$instance->stack); // Initialize the tokenizer. static::$instance->tokenizer = new Tokenizer(static::$instance->data, static::$instance->stack); // Initialize the parse error handler. diff --git a/lib/Stack.php b/lib/Stack.php index b1c567d..f631290 100644 --- a/lib/Stack.php +++ b/lib/Stack.php @@ -4,6 +4,13 @@ namespace dW\HTML5; class Stack implements \ArrayAccess { protected $_storage = []; + protected $fragmentCase; + protected $fragmentContext; + + public function __construct(bool $fragmentCase = false, $fragmentContext = null) { + $this->fragmentCase = $fragmentCase; + $this->fragmentContext = $fragmentContext; + } public function offsetSet($offset, $value) { if ($offset < 0) { @@ -76,7 +83,7 @@ class Stack implements \ArrayAccess { # the HTML fragment parsing algorithm and the stack of open elements has only one # element in it (fragment case); otherwise, the adjusted current node is the # current node. - return (Parser::$instance->fragmentCase && $this->length === 1) ? Parser::$instance->fragmentContext : $this->currentNode; + return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode; break; case 'adjustedCurrentNodeNamespace': $adjustedCurrentNode = $this->adjustedCurrentNode;