Browse Source

Changed Stack to abstract class

Moved the stack of open elements to its own class OpenElementsStack and made it and ActiveFormattingElementsList inherit from an abstract Stack class.
split-manual
Dustin Wilson 6 years ago
parent
commit
c414f3dbfa
  1. 46
      lib/ActiveFormattingElementsList.php
  2. 32
      lib/Exception.php
  3. 104
      lib/OpenElementsStack.php
  4. 2
      lib/Parser.php
  5. 93
      lib/Stack.php
  6. 2
      lib/Tokenizer.php
  7. 2
      lib/TreeBuilder.php

46
lib/ActiveFormattingElementsList.php

@ -14,17 +14,17 @@ namespace dW\HTML5;
# In addition, each element in the list of active formatting elements is
# associated with the token for which it was created, so that further elements
# can be created for that token if necessary.
class ActiveFormattingElementsList implements \ArrayAccess {
class ActiveFormattingElementsList extends Stack {
protected $_storage = [];
protected $stack;
public function __construct(Stack $stack) {
public function __construct(OpenElementsStack $stack) {
$this->stack = $stack;
}
public function offsetSet($offset, $value) {
if ($offset < 0) {
throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX);
if ($offset < 0 || $offset > count($this->_storage) - 1) {
throw new Exception(Exception::STACK_INVALID_INDEX);
}
if (is_null($offset)) {
@ -78,32 +78,10 @@ class ActiveFormattingElementsList implements \ArrayAccess {
# 2. Add element to the list of active formatting elements.
$this->_storage[] = $value;
} else {
$this->_storage[$offset] = $value;
parent::offsetSet($offset, $value);
}
}
public function offsetExists($offset) {
return isset($this->_storage[$offset]);
}
public function offsetUnset($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX);
}
unset($this->_storage[$offset]);
// Reindex the array.
$this->_storage = array_values($this->_storage);
}
public function offsetGet($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
throw new Exception(Exception::ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX);
}
return $this->_storage[$offset];
}
public function insert(StartTagToken $token, \DOMElement $element) {
$this->_storage[] = [
'token' => $token,
@ -115,17 +93,14 @@ class ActiveFormattingElementsList implements \ArrayAccess {
$this->offsetSet(null, new ActiveFormattingElementMarker());
}
public function pop() {
return array_pop($this->_storage);
}
public function reconstruct() {
# When the steps below require the UA to reconstruct the active formatting
# elements, the UA must perform the following steps:
// Yes, I know this uses gotos, but here are the reasons for using them:
// 1. The spec seems to actively encourage using them, even providing
// suggestions on what to name the labels.
// 2. It'd be a pain to program and maintain without them because of this.
// 2. It'd be a pain to program and maintain without them because the algorithm
// jumps around all over the place.
# 1. If there are no entries in the list of active formatting elements, then
# there is nothing to reconstruct; stop this algorithm.
@ -208,6 +183,11 @@ class ActiveFormattingElementsList implements \ArrayAccess {
}
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {
return $value;
}
switch ($property) {
case 'lastMarker':
for ($end = count($this->_storage) - 1, $i = $end; $i >= 0; $i--) {
@ -218,8 +198,6 @@ class ActiveFormattingElementsList implements \ArrayAccess {
return false;
break;
case 'length': return count($this->_storage);
break;
default: return null;
}
}

32
lib/Exception.php

@ -15,18 +15,16 @@ class Exception extends \Exception {
const STACK_DOMNODE_ONLY = 10202;
const STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10203;
const ACTIVE_FORMATTING_ELEMENT_LIST_INVALID_INDEX = 10301;
const DATASTREAM_NODATA = 10301;
const DATASTREAM_INVALID_DATA_CONSUMPTION_LENGTH = 10302;
const DATASTREAM_NODATA = 10401;
const DATASTREAM_INVALID_DATA_CONSUMPTION_LENGTH = 10402;
const DOM_DOMDOCUMENT_EXPECTED = 10401;
const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10402;
const DOM_DOMDOCUMENT_EXPECTED = 10501;
const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10502;
const TOKENIZER_INVALID_STATE = 10501;
const TOKENIZER_INVALID_STATE = 10601;
const TREEBUILDER_FORMELEMENT_EXPECTED = 10701;
const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10702;
const TREEBUILDER_FORMELEMENT_EXPECTED = 10601;
const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10602;
protected static $messages = [10000 => 'Invalid error code',
10001 => 'Unknown error; escaping',
@ -40,18 +38,16 @@ class Exception extends \Exception {
10202 => 'Instances of DOMNode are the only types allowed in a Stack',
10203 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected for fragment context; found %s',
10301 => '%s is an invalid ActiveFormattingElementsList index',
10401 => 'Data string expected; found %s',
10402 => '%s is an invalid data consumption length; a value of 1 or above is expected',
10301 => 'Data string expected; found %s',
10302 => '%s is an invalid data consumption length; a value of 1 or above is expected',
10501 => 'The first argument must be an instance of \DOMElement or null; found %s',
10502 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s',
10401 => 'The first argument must be an instance of \DOMElement or null; found %s',
10402 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s',
10601 => 'The Tokenizer has entered an invalid state',
10501 => 'The Tokenizer has entered an invalid state',
10701 => 'Form element expected, found %s',
10702 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s'];
10601 => 'Form element expected, found %s',
10602 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s'];
public function __construct(int $code, ...$args) {
if (!isset(static::$messages[$code])) {

104
lib/OpenElementsStack.php

@ -0,0 +1,104 @@
<?php
declare(strict_types=1);
namespace dW\HTML5;
class OpenElementsStack extends Stack {
protected $fragmentCase;
protected $fragmentContext;
public function __construct(bool $fragmentCase = false, $fragmentContext = null) {
// If the fragment context is not null and is not a document fragment, document,
// or element then we have a problem. Additionally, if the parser is created for
// parsing a fragment and the fragment context is null then we have a problem,
// too.
if ((!is_null($fragmentContext) && !$fragmentContext instanceof DOMDocumentFragment && !$fragmentContext instanceof DOMDocument && !$fragmentContext instanceof DOMElement) ||
(is_null($fragmentContext) && $fragmentCase)) {
throw new Exception(Exception::STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED, gettype($fragmentContext));
}
$this->fragmentCase = $fragmentCase;
$this->fragmentContext = $fragmentContext;
}
public function search(mixed $needle): int {
if (!$needle) {
return -1;
}
if ($needle instanceof DOMElement) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->isSameNode($needle)) {
return $key;
}
}
} elseif (is_string($needle)) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->nodeName === $needle) {
return $key;
}
}
}
return -1;
}
public function generateImpliedEndTags() {
$currentNodeName = end($this->_storage)->nodeName;
while ($currentNodeName === 'caption' || $currentNodeName === 'colgroup' || $currentNodeName === 'dd' || $currentNodeName === 'dt' || $currentNodeName === 'li' || $currentNodeName === 'optgroup' || $currentNodeName === 'option' || $currentNodeName === 'p' || $currentNodeName === 'rb' || $currentNodeName === 'rp' || $currentNodeName === 'rt' || $currentNodeName === 'rtc' || $currentNodeName === 'tbody' || $currentNodeName === 'td' || $currentNodeName === 'tfoot' || $currentNodeName === 'th' || $currentNodeName === 'thead' || $currentNodeName === 'tr') {
$this->pop();
$currentNodeName = end($this->_storage)->nodeName;
}
}
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {
return $value;
}
switch ($property) {
case 'adjustedCurrentNode':
# The adjusted current node is the context element if the parser was created by
# the HTML fragment parsing algorithm and the stack of open elements has only one
# element in it (fragment case); otherwise, the adjusted current node is the
# current node.
return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode;
break;
case 'adjustedCurrentNodeName':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null;
break;
case 'adjustedCurrentNodeNamespace':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null;
break;
case 'currentNode':
$currentNode = end($this->_storage);
return ($currentNode) ? $currentNode : null;
break;
case 'currentNodeName':
$currentNode = $this->currentNode;
return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null;
break;
case 'currentNodeNamespace':
$currentNode = $this->currentNode;
return (!is_null($currentNode)) ? $currentNode->namespaceURI: null;
break;
default: return null;
}
}
// Used when listing expected elements when returning parse errors
public function __toString(): string {
if (count($this->_storage) > 1) {
// Don't output the name of the root element.
for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) {
$temp[] = $this->_storage[$i]->nodeName;
}
return implode(', ', array_unique($temp));
} else {
return '';
}
}
}

2
lib/Parser.php

@ -77,7 +77,7 @@ class Parser {
setlocale(LC_CTYPE, 'en_US.UTF8');
// Initialize the stack of open elements.
static::$instance->stack = new Stack(static::$instance->fragmentCase, static::$instance->fragmentContext);
static::$instance->stack = new OpenElementsStack(static::$instance->fragmentCase, static::$instance->fragmentContext);
// Initialize the tokenizer.
static::$instance->tokenizer = new Tokenizer(static::$instance->data, static::$instance->stack);
// Initialize the tree builder.

93
lib/Stack.php

@ -2,25 +2,11 @@
declare(strict_types=1);
namespace dW\HTML5;
class Stack implements \ArrayAccess {
abstract class Stack implements \ArrayAccess {
protected $_storage = [];
protected $fragmentCase;
protected $fragmentContext;
public function __construct(bool $fragmentCase = false, $fragmentContext = null) {
// If the fragment context is not null and is not a document fragment, document,
// or element then we have a problem. Additionally, if the parser is created for
// parsing a fragment and the fragment context is null then we have a problem,
// too.
if ((!is_null($fragmentContext) && !$fragmentContext instanceof DOMDocumentFragment && !$fragmentContext instanceof DOMDocument && !$fragmentContext instanceof DOMElement) ||
(is_null($fragmentContext) && $fragmentCase)) {
throw new Exception(Exception::STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED, gettype($fragmentContext));
}
$this->fragmentCase = $fragmentCase;
$this->fragmentContext = $fragmentContext;
}
public function offsetSet($offset, $value) {
if ($offset < 0) {
throw new Exception(Exception::STACK_INVALID_INDEX);
@ -38,15 +24,17 @@ class Stack implements \ArrayAccess {
}
public function offsetUnset($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
if ($offset < 0 || $offset > count($this->_storage) - 1) {
throw new Exception(Exception::STACK_INVALID_INDEX);
}
unset($this->_storage[$offset]);
// Reindex the array.
$this->_storage = array_values($this->_storage);
}
public function offsetGet($offset) {
if ($offset < 0 || $offset > count($this->$storage) - 1) {
if ($offset < 0 || $offset > count($this->_storage) - 1) {
throw new Exception(Exception::STACK_INVALID_INDEX);
}
@ -57,82 +45,11 @@ class Stack implements \ArrayAccess {
return array_pop($this->_storage);
}
public function search(mixed $needle): int {
if (!$needle) {
return -1;
}
if ($needle instanceof DOMElement) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->isSameNode($needle)) {
return $key;
}
}
} elseif (is_string($needle)) {
foreach (array_reverse($this->_storage) as $key=>$value) {
if ($value->nodeName === $needle) {
return $key;
}
}
}
return -1;
}
public function generateImpliedEndTags() {
$currentNodeName = end($this->_storage)->nodeName;
while ($currentNodeName === 'caption' || $currentNodeName === 'colgroup' || $currentNodeName === 'dd' || $currentNodeName === 'dt' || $currentNodeName === 'li' || $currentNodeName === 'optgroup' || $currentNodeName === 'option' || $currentNodeName === 'p' || $currentNodeName === 'rb' || $currentNodeName === 'rp' || $currentNodeName === 'rt' || $currentNodeName === 'rtc' || $currentNodeName === 'tbody' || $currentNodeName === 'td' || $currentNodeName === 'tfoot' || $currentNodeName === 'th' || $currentNodeName === 'thead' || $currentNodeName === 'tr') {
$this->pop();
$currentNodeName = end($this->_storage)->nodeName;
}
}
public function __get($property) {
switch ($property) {
case 'adjustedCurrentNode':
# The adjusted current node is the context element if the parser was created by
# the HTML fragment parsing algorithm and the stack of open elements has only one
# element in it (fragment case); otherwise, the adjusted current node is the
# current node.
return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode;
break;
case 'adjustedCurrentNodeName':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null;
break;
case 'adjustedCurrentNodeNamespace':
$adjustedCurrentNode = $this->adjustedCurrentNode;
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null;
break;
case 'currentNode':
$currentNode = end($this->_storage);
return ($currentNode) ? $currentNode : null;
break;
case 'currentNodeName':
$currentNode = $this->currentNode;
return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null;
break;
case 'currentNodeNamespace':
$currentNode = $this->currentNode;
return (!is_null($currentNode)) ? $currentNode->namespaceURI: null;
break;
case 'length': return count($this->_storage);
break;
default: return null;
}
}
// Used when listing expected elements when returning parse errors
public function __toString(): string {
if (count($this->_storage) > 1) {
// Don't output the name of the root element.
for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) {
$temp[] = $this->_storage[$i]->nodeName;
}
return implode(', ', array_unique($temp));
} else {
return '';
}
}
}

2
lib/Tokenizer.php

@ -78,7 +78,7 @@ class Tokenizer {
const CTYPE_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
const CTYPE_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
public function __construct(DataStream $data, Stack $stack) {
public function __construct(DataStream $data, OpenElementsStack $stack) {
$this->state = self::DATA_STATE;
$this->data = $data;
$this->stack = $stack;

2
lib/TreeBuilder.php

@ -73,7 +73,7 @@ class TreeBuilder {
const QUIRKS_MODE_LIMITED = 2;
public function __construct(DOM $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, Stack $stack, Tokenizer $tokenizer) {
public function __construct(DOM $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Tokenizer $tokenizer) {
// If the form element isn't an instance of DOMElement that has a node name of
// "form" or null then there's a problem.
if (!is_null($formElement) && !($formElement instanceof DOMElement && $formElement->nodeName === 'form')) {

Loading…
Cancel
Save