Browse Source

Changed Stack to abstract class

Moved the stack of open elements to its own class OpenElementsStack and made it and ActiveFormattingElementsList inherit from an abstract Stack class.
ns
Dustin Wilson 6 years ago
parent
commit
c414f3dbfa
  1. 46
      lib/ActiveFormattingElementsList.php
  2. 32
      lib/Exception.php
  3. 104
      lib/OpenElementsStack.php
  4. 2
      lib/Parser.php
  5. 93
      lib/Stack.php
  6. 2
      lib/Tokenizer.php
  7. 2
      lib/TreeBuilder.php

46
lib/ActiveFormattingElementsList.php

@ -14,17 +14,17 @@ namespace dW\HTML5;
# In addition, each element in the list of active formatting elements is
# associated with the token for which it was created, so that further elements
# can be created for that token if necessary.
class ActiveFormattingElementsList implements \ArrayAccess {
class ActiveFormattingElementsList extends Stack {
protected $_storage = [];
protected $stack;
public function __construct(Stack $stack) {
public function __construct(OpenElementsStack $stack) {
$this->stack = $stack;
}
public function offsetSet($offset, $value) {
if ($offset < 0) {
throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX);
if ($offset < 0 || $offset > count($this->_storage) - 1) {
throw new Exception(Exception::STACK_INVALID_INDEX);
}
if (is_null($offset)) {
@ -78,32 +78,10 @@ class ActiveFormattingElementsList implements \ArrayAccess {
# 2. Add element to the list of active formatting elements.
$this->_storage[] = $value;
} else {
$this->_storage[$offset] = $value;
parent::offsetSet($offset, $value);
}
}
public function offsetExists($offset) {
return isset($this->_storage[$offset]);
}
public function offsetUnset($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX);
}
unset($this->_storage[$offset]);
// Reindex the array.
$this->_storage = array_values($this->_storage);
}
public function offsetGet($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX);
}
return $this->_storage[$offset];
}
public function insert(StartTagToken $token, \DOMElement $element) {
$this->_storage[] = [
'token' => $token,
@ -115,17 +93,14 @@ class ActiveFormattingElementsList implements \ArrayAccess {
$this->offsetSet(null, new ActiveFormattingElementMarker());
}
public function pop() {
return array_pop($this->_storage);
}
public function reconstruct() {
# When the steps below require the UA to reconstruct the active formatting
# elements, the UA must perform the following steps:
// Yes, I know this uses gotos, but here are the reasons for using them:
// 1. The spec seems to actively encourage using them, even providing
// suggestions on what to name the labels.
// 2. It'd be a pain to program and maintain without them because of this.
// 2. It'd be a pain to program and maintain without them because the algorithm
// jumps around all over the place.
# 1. If there are no entries in the list of active formatting elements, then
# there is nothing to reconstruct; stop this algorithm.
@ -208,6 +183,11 @@ class ActiveFormattingElementsList implements \ArrayAccess {
}
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {
return $value;
}
switch ($property) {
case 'lastMarker':
for ($end = count($this->_storage) - 1, $i = $end; $i >= 0; $i--) {
@ -218,8 +198,6 @@ class ActiveFormattingElementsList implements \ArrayAccess {
return false;
break;
case 'length': return count($this->_storage);
break;
default: return null;
}
}

32
lib/Exception.php

@ -15,18 +15,16 @@ class Exception extends \Exception {
const STACK_DOMNODE_ONLY = 10202;
const STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10203;
const ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX = 10301;
const DATASTREAM_NODATA = 10301;
const DATASTREAM_INVALID_DATA_CONSUMPTION_LENGTH = 10302;
const DATASTREAM_NODATA = 10401;
const DATASTREAM_INVALID_DATA_CONSUMPTION_LENGTH = 10402;
const DOM_DOMDOCUMENT_EXPECTED = 10401;
const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10402;
const DOM_DOMDOCUMENT_EXPECTED = 10501;
const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10502;
const TOKENIZER_INVALID_STATE = 10501;
const TOKENIZER_INVALID_STATE = 10601;
const TREEBUILDER_FORMELEMENT_EXPECTED = 10701;
const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10702;
const TREEBUILDER_FORMELEMENT_EXPECTED = 10601;
const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10602;
protected static $messages = [10000 => 'Invalid error code',
10001 => 'Unknown error; escaping',
@ -40,18 +38,16 @@ class Exception extends \Exception {
10202 => 'Instances of DOMNode are the only types allowed in a Stack',
10203 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected for fragment context; found %s',
10301 => '%s is an invalid ActiveFormattingElementsList index',
10401 => 'Data string expected; found %s',
10402 => '%s is an invalid data consumption length; a value of 1 or above is expected',
10301 => 'Data string expected; found %s',
10302 => '%s is an invalid data consumption length; a value of 1 or above is expected',
10501 => 'The first argument must be an instance of \DOMElement or null; found %s',
10502 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s',
10401 => 'The first argument must be an instance of \DOMElement or null; found %s',
10402 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s',
10601 => 'The Tokenizer has entered an invalid state',
10501 => 'The Tokenizer has entered an invalid state',
10701 => 'Form element expected, found %s',
10702 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s'];
10601 => 'Form element expected, found %s',
10602 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s'];
public function __construct(int $code, ...$args) {
if (!isset(static::$messages[$code])) {

104
lib/OpenElementsStack.php

@ -0,0 +1,104 @@
<?php
declare(strict_types=1);
namespace dW\HTML5;
class OpenElementsStack extends Stack {
protected $fragmentCase;
protected $fragmentContext;
public function __construct(bool $fragmentCase = false, $fragmentContext = null) {
// If the fragment context is not null and is not a document fragment, document,
// or element then we have a problem. Additionally, if the parser is created for
// parsing a fragment and the fragment context is null then we have a problem,
// too.
if ((!is_null($fragmentContext) && !$fragmentContext instanceof DOMDocumentFragment && !$fragmentContext instanceof DOMDocument && !$fragmentContext instanceof DOMElement) ||
(is_null($fragmentContext) && $fragmentCase)) {
throw new Exception(Exception::STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED, gettype($fragmentContext));
}
$this->fragmentCase = $fragmentCase;
$this->fragmentContext = $fragmentContext;
}
public function search(mixed $needle): int {
if (!$needle) {
return -1;
}
if ($needle instanceof DOMElement) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->isSameNode($needle)) {
return $key;
}
}
} elseif (is_string($needle)) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->nodeName === $needle) {
return $key;
}
}
}
return -1;
}
public function generateImpliedEndTags() {
$currentNodeName = end($this->_storage)->nodeName;
while ($currentNodeName === 'caption' || $currentNodeName === 'colgroup' || $currentNodeName === 'dd' || $currentNodeName === 'dt' || $currentNodeName === 'li' || $currentNodeName === 'optgroup' || $currentNodeName === 'option' || $currentNodeName === 'p' || $currentNodeName === 'rb' || $currentNodeName === 'rp' || $currentNodeName === 'rt' || $currentNodeName === 'rtc' || $currentNodeName === 'tbody' || $currentNodeName === 'td' || $currentNodeName === 'tfoot' || $currentNodeName === 'th' || $currentNodeName === 'thead' || $currentNodeName === 'tr') {
$this->pop();
$currentNodeName = end($this->_storage)->nodeName;
}
}
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {
return $value;
}
switch ($property) {
case 'adjustedCurrentNode':
# The adjusted current node is the context element if the parser was created by
# the HTML fragment parsing algorithm and the stack of open elements has only one
# element in it (fragment case); otherwise, the adjusted current node is the
# current node.
return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode;
break;
case 'adjustedCurrentNodeName':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null;
break;
case 'adjustedCurrentNodeNamespace':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null;
break;
case 'currentNode':
$currentNode = end($this->_storage);
return ($currentNode) ? $currentNode : null;
break;
case 'currentNodeName':
$currentNode = $this->currentNode;
return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null;
break;
case 'currentNodeNamespace':
$currentNode = $this->currentNode;
return (!is_null($currentNode)) ? $currentNode->namespaceURI: null;
break;
default: return null;
}
}
// Used when listing expected elements when returning parse errors
public function __toString(): string {
if (count($this->_storage) > 1) {
// Don't output the name of the root element.
for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) {
$temp[] = $this->_storage[$i]->nodeName;
}
return implode(', ', array_unique($temp));
} else {
return '';
}
}
}

2
lib/Parser.php

@ -77,7 +77,7 @@ class Parser {
setlocale(LC_CTYPE, 'en_US.UTF8');
// Initialize the stack of open elements.
static::$instance->stack = new Stack(static::$instance->fragmentCase, static::$instance->fragmentContext);
static::$instance->stack = new OpenElementsStack(static::$instance->fragmentCase, static::$instance->fragmentContext);
// Initialize the tokenizer.
static::$instance->tokenizer = new Tokenizer(static::$instance->data, static::$instance->stack);
// Initialize the tree builder.

93
lib/Stack.php

@ -2,25 +2,11 @@
declare(strict_types=1);
namespace dW\HTML5;
class Stack implements \ArrayAccess {
abstract class Stack implements \ArrayAccess {
protected $_storage = [];
protected $fragmentCase;
protected $fragmentContext;
public function __construct(bool $fragmentCase = false, $fragmentContext = null) {
// If the fragment context is not null and is not a document fragment, document,
// or element then we have a problem. Additionally, if the parser is created for
// parsing a fragment and the fragment context is null then we have a problem,
// too.
if ((!is_null($fragmentContext) && !$fragmentContext instanceof DOMDocumentFragment && !$fragmentContext instanceof DOMDocument && !$fragmentContext instanceof DOMElement) ||
(is_null($fragmentContext) && $fragmentCase)) {
throw new Exception(Exception::STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED, gettype($fragmentContext));
}
$this->fragmentCase = $fragmentCase;
$this->fragmentContext = $fragmentContext;
}
public function offsetSet($offset, $value) {
if ($offset < 0) {
throw new Exception(Exception::STACK_INVALID_INDEX);
@ -38,15 +24,17 @@ class Stack implements \ArrayAccess {
}
public function offsetUnset($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
if ($offset < 0 || $offset > count($this->_storage) - 1) {
throw new Exception(Exception::STACK_INVALID_INDEX);
}
unset($this->_storage[$offset]);
// Reindex the array.
$this->_storage = array_values($this->_storage);
}
public function offsetGet($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
if ($offset < 0 || $offset > count($this->_storage) - 1) {
throw new Exception(Exception::STACK_INVALID_INDEX);
}
@ -57,82 +45,11 @@ class Stack implements \ArrayAccess {
return array_pop($this->_storage);
}
public function search(mixed $needle): int {
if (!$needle) {
return -1;
}
if ($needle instanceof DOMElement) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->isSameNode($needle)) {
return $key;
}
}
} elseif (is_string($needle)) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->nodeName === $needle) {
return $key;
}
}
}
return -1;
}
public function generateImpliedEndTags() {
$currentNodeName = end($this->_storage)->nodeName;
while ($currentNodeName === 'caption' || $currentNodeName === 'colgroup' || $currentNodeName === 'dd' || $currentNodeName === 'dt' || $currentNodeName === 'li' || $currentNodeName === 'optgroup' || $currentNodeName === 'option' || $currentNodeName === 'p' || $currentNodeName === 'rb' || $currentNodeName === 'rp' || $currentNodeName === 'rt' || $currentNodeName === 'rtc' || $currentNodeName === 'tbody' || $currentNodeName === 'td' || $currentNodeName === 'tfoot' || $currentNodeName === 'th' || $currentNodeName === 'thead' || $currentNodeName === 'tr') {
$this->pop();
$currentNodeName = end($this->_storage)->nodeName;
}
}
public function __get($property) {
switch ($property) {
case 'adjustedCurrentNode':
# The adjusted current node is the context element if the parser was created by
# the HTML fragment parsing algorithm and the stack of open elements has only one
# element in it (fragment case); otherwise, the adjusted current node is the
# current node.
return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode;
break;
case 'adjustedCurrentNodeName':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null;
break;
case 'adjustedCurrentNodeNamespace':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null;
break;
case 'currentNode':
$currentNode = end($this->_storage);
return ($currentNode) ? $currentNode : null;
break;
case 'currentNodeName':
$currentNode = $this->currentNode;
return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null;
break;
case 'currentNodeNamespace':
$currentNode = $this->currentNode;
return (!is_null($currentNode)) ? $currentNode->namespaceURI: null;
break;
case 'length': return count($this->_storage);
break;
default: return null;
}
}
// Used when listing expected elements when returning parse errors
public function __toString(): string {
if (count($this->_storage) > 1) {
// Don't output the name of the root element.
for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) {
$temp[] = $this->_storage[$i]->nodeName;
}
return implode(', ', array_unique($temp));
} else {
return '';
}
}
}

2
lib/Tokenizer.php

@ -78,7 +78,7 @@ class Tokenizer {
const CTYPE_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
const CTYPE_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
public function __construct(DataStream $data, Stack $stack) {
public function __construct(DataStream $data, OpenElementsStack $stack) {
$this->state = self::DATA_STATE;
$this->data = $data;
$this->stack = $stack;

2
lib/TreeBuilder.php

@ -73,7 +73,7 @@ class TreeBuilder {
const QUIRKS_MODE_LIMITED = 2;
public function __construct(DOM $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, Stack $stack, Tokenizer $tokenizer) {
public function __construct(DOM $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Tokenizer $tokenizer) {
// If the form element isn't an instance of DOMElement that has a node name of
// "form" or null then there's a problem.
if (!is_null($formElement) && !($formElement instanceof DOMElement && $formElement->nodeName === 'form')) {

Loading…
Cancel
Save