diff --git a/lib/DOM/Element.php b/lib/DOM/Element.php index 84ad7bd..4e3a0a0 100644 --- a/lib/DOM/Element.php +++ b/lib/DOM/Element.php @@ -11,6 +11,22 @@ class Element extends \DOMElement { protected $_classList; + public function appendChild($node) { + // If appending a class attribute node, and classList has been invoked set + // the class using classList instead of appending the attribute node. Will + // return the created node instead. TokenList appends an attribute node + // internally to set the class attribute, so to prevent an infinite call loop + // from occurring, a check between the normalized value and classList's + // serialized value is performed. The spec is vague on how this is supposed to + // be handled. + if ($node instanceof \DOMAttr && $this->_classList !== null && $node->namespaceURI === null && $node->name === 'class' && preg_replace(Data::WHITESPACE_REGEX, ' ', $node->value) !== $this->_classList->value) { + $this->_classList->value = $node->value; + return $this->getAttributeNode('class'); + } + + return parent::appendChild($node); + } + public function getAttribute($name) { // Newer versions of the DOM spec have getAttribute return an empty string only // when the attribute exists and is empty, otherwise null. This fixes that. @@ -35,6 +51,8 @@ class Element extends \DOMElement { public function setAttribute($name, $value) { try { + // If setting a class attribute and classList has been invoked use classList to + // set it. if ($this->_classList !== null && $name === 'class') { $this->_classList->value = $value; } else { @@ -55,7 +73,9 @@ class Element extends \DOMElement { public function setAttributeNS($namespaceURI, $qualifiedName, $value) { try { - if ($namespaceURI === null && $this->_classList !== null && $qualifiedName === 'class') { + // If setting a class attribute and classList has been invoked use classList to + // set it. + if ($this->_classList !== null && $namespaceURI === null && $qualifiedName === 'class') { $this->_classList->value = $value; } else { parent::setAttributeNS($namespaceURI, $qualifiedName, $value); diff --git a/lib/DOM/TokenList.php b/lib/DOM/TokenList.php index 1a333f7..56f826c 100644 --- a/lib/DOM/TokenList.php +++ b/lib/DOM/TokenList.php @@ -47,7 +47,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator { # 2. If token contains any ASCII whitespace, then throw an # "InvalidCharacterError" DOMException. - if (preg_match(self::ASCII_WHITESPACE_REGEX, $token)) { + if (preg_match(Data::WHITESPACE_REGEX, $token)) { throw new DOMException(DOMException::INVALID_CHARACTER); } } @@ -119,7 +119,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator { # 2. If token contains any ASCII whitespace, then throw an # "InvalidCharacterError" DOMException. - if (preg_match(self::ASCII_WHITESPACE_REGEX, $token)) { + if (preg_match(Data::WHITESPACE_REGEX, $token)) { throw new DOMException(DOMException::INVALID_CHARACTER); } } @@ -151,7 +151,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator { # 2. If either token or newToken contains any ASCII whitespace, then throw an # "InvalidCharacterError" DOMException. - if (preg_match(self::ASCII_WHITESPACE_REGEX, $token) || preg_match(self::ASCII_WHITESPACE_REGEX, $newToken)) { + if (preg_match(Data::WHITESPACE_REGEX, $token) || preg_match(Data::WHITESPACE_REGEX, $newToken)) { throw new DOMException(DOMException::INVALID_CHARACTER); } @@ -201,7 +201,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator { # 2. If token contains any ASCII whitespace, then throw an # "InvalidCharacterError" DOMException. - if (preg_match(self::ASCII_WHITESPACE_REGEX, $token)) { + if (preg_match(Data::WHITESPACE_REGEX, $token)) { throw new DOMException(DOMException::INVALID_CHARACTER); } @@ -264,7 +264,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator { # # 1. Let inputTokens be the result of splitting input on ASCII whitespace. // There isn't a Set in php, so make sure all the tokens are unique. - $inputTokens = array_unique(preg_split(self::ASCII_WHITESPACE_REGEX, $input)); + $inputTokens = array_unique(preg_split(Data::WHITESPACE_REGEX, $input)); # 2. Let tokens be a new ordered set. # 3. For each token in inputTokens, append token to tokens. @@ -274,11 +274,18 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator { } protected function update() { - // Create the attribute using createAttribute because setAttribute has been - // extended to use TokenList when necessary. + # A DOMTokenList object’s update steps are: + # + # 1. If the associated element does not have an associated attribute and token + # set is empty, then return. + // Not sure what this is about. This class is constructed with a provided + // associated element and attribute; there is no need to do this. + + # 2. Set an attribute value for the associated element using associated + # attribute’s local name and the result of running the ordered set serializer + # for token set. $element = $this->element->get(); - $doc = $element->ownerDocument; - $class = $doc->createAttribute($this->localName); + $class = $element->ownerDocument->createAttribute($this->localName); $class->value = $this->__toString(); $element->appendChild($class); } diff --git a/lib/Data.php b/lib/Data.php index 7446c28..b6ed3f1 100644 --- a/lib/Data.php +++ b/lib/Data.php @@ -42,6 +42,7 @@ class Data { const DIGIT = '0123456789'; const HEX = '0123456789ABCDEFabcdef'; const WHITESPACE = "\t\n\x0C\x0D "; + const WHITESPACE_REGEX = '/[\t\n\x0c\x0D ]+/'; const WHITESPACE_SAFE = "\t\x0C "; @@ -56,7 +57,7 @@ class Data { # User agents must use the following algorithm, called the encoding # sniffing algorithm, to determine the character encoding to use # when decoding a document in the first pass. This algorithm takes - # as input any out-of-band metadata available to the user agent + # as input any out-of-band metadata available to the user agent # (e.g. the Content-Type metadata of the document) and all the bytes # available so far, and returns a character encoding and a confidence # that is either tentative or certain. @@ -93,9 +94,9 @@ class Data { public function consume(): string { $char = $this->data->nextChar(); - # Before the tokenization stage, the input stream must be + # Before the tokenization stage, the input stream must be # preprocessed by normalizing newlines. - # Thus, newlines in HTML DOMs are represented by U+000A LF characters, + # Thus, newlines in HTML DOMs are represented by U+000A LF characters, # and there are never any U+000D CR characters in the input to the tokenization stage. if ($char === "\r") { // if this is a CR+LF pair, skip the CR and note the normalization @@ -103,7 +104,7 @@ class Data { $char = $this->data->nextChar(); $this->normalized[$this->data->posChar()] = true; } - // otherwise just silently change the character to LF; + // otherwise just silently change the character to LF; // the bare CR will be trivial to process when seeking backwards else { $char = "\n"; @@ -119,7 +120,7 @@ class Data { $this->eof = true; } else { $this->_column++; - $len = strlen($char); + $len = strlen($char); $here = $this->data->posChar(); if ($this->lastError < $here) { // look for erroneous characters @@ -246,12 +247,12 @@ class Data { $line = $this->_line; $col = $this->_column; do { - // If the current position is the start of a line, + // If the current position is the start of a line, // get the column position of the end of the previous line if (isset($this->newlines[$pos])) { $line--; $col = $this->newlines[$pos]; - // If the newline was a normalized CR+LF pair, + // If the newline was a normalized CR+LF pair, // go back one extra character if (isset($this->normalized[$pos])) { $pos--;