Element::classList fixes

3 years ago · 1c20e458ea
3 changed files with 45 additions and 17 deletions
--- a/lib/DOM/Element.php
+++ b/lib/DOM/Element.php
@ -11,6 +11,22 @@ class Element extends \DOMElement {

    protected $_classList;

+    public function appendChild($node) {
+        // If appending a class attribute node, and classList has been invoked set
+        // the class using classList instead of appending the attribute node. Will
+        // return the created node instead. TokenList appends an attribute node
+        // internally to set the class attribute, so to prevent an infinite call loop
+        // from occurring, a check between the normalized value and classList's
+        // serialized value is performed. The spec is vague on how this is supposed to
+        // be handled.
+        if ($node instanceof \DOMAttr && $this->_classList !== null && $node->namespaceURI === null && $node->name === 'class' && preg_replace(Data::WHITESPACE_REGEX, ' ', $node->value) !== $this->_classList->value) {
+            $this->_classList->value = $node->value;
+            return $this->getAttributeNode('class');
+        }
+
+        return parent::appendChild($node);
+    }
+
    public function getAttribute($name) {
        // Newer versions of the DOM spec have getAttribute return an empty string only
        // when the attribute exists and is empty, otherwise null. This fixes that.
@ -35,6 +51,8 @@ class Element extends \DOMElement {

    public function setAttribute($name, $value) {
        try {
+            // If setting a class attribute and classList has been invoked use classList to
+            // set it.
            if ($this->_classList !== null && $name === 'class') {
                $this->_classList->value = $value;
            } else {
@ -55,7 +73,9 @@ class Element extends \DOMElement {

    public function setAttributeNS($namespaceURI, $qualifiedName, $value) {
        try {
-            if ($namespaceURI === null && $this->_classList !== null && $qualifiedName === 'class') {
+            // If setting a class attribute and classList has been invoked use classList to
+            // set it.
+            if ($this->_classList !== null && $namespaceURI === null && $qualifiedName === 'class') {
                $this->_classList->value = $value;
            } else {
                parent::setAttributeNS($namespaceURI, $qualifiedName, $value);
--- a/lib/DOM/TokenList.php
+++ b/lib/DOM/TokenList.php
@ -47,7 +47,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator {

            # 2. If token contains any ASCII whitespace, then throw an
            # "InvalidCharacterError" DOMException.
-            if (preg_match(self::ASCII_WHITESPACE_REGEX, $token)) {
+            if (preg_match(Data::WHITESPACE_REGEX, $token)) {
                throw new DOMException(DOMException::INVALID_CHARACTER);
            }
        }
@ -119,7 +119,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator {

            # 2. If token contains any ASCII whitespace, then throw an
            # "InvalidCharacterError" DOMException.
-            if (preg_match(self::ASCII_WHITESPACE_REGEX, $token)) {
+            if (preg_match(Data::WHITESPACE_REGEX, $token)) {
                throw new DOMException(DOMException::INVALID_CHARACTER);
            }
        }
@ -151,7 +151,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator {

        # 2. If either token or newToken contains any ASCII whitespace, then throw an
        # "InvalidCharacterError" DOMException.
-        if (preg_match(self::ASCII_WHITESPACE_REGEX, $token) || preg_match(self::ASCII_WHITESPACE_REGEX, $newToken)) {
+        if (preg_match(Data::WHITESPACE_REGEX, $token) || preg_match(Data::WHITESPACE_REGEX, $newToken)) {
            throw new DOMException(DOMException::INVALID_CHARACTER);
        }

@ -201,7 +201,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator {

        # 2. If token contains any ASCII whitespace, then throw an
        # "InvalidCharacterError" DOMException.
-        if (preg_match(self::ASCII_WHITESPACE_REGEX, $token)) {
+        if (preg_match(Data::WHITESPACE_REGEX, $token)) {
            throw new DOMException(DOMException::INVALID_CHARACTER);
        }

@ -264,7 +264,7 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator {
        #
        # 1. Let inputTokens be the result of splitting input on ASCII whitespace.
        // There isn't a Set in php, so make sure all the tokens are unique.
-        $inputTokens = array_unique(preg_split(self::ASCII_WHITESPACE_REGEX, $input));
+        $inputTokens = array_unique(preg_split(Data::WHITESPACE_REGEX, $input));

        # 2. Let tokens be a new ordered set.
        # 3. For each token in inputTokens, append token to tokens.
@ -274,11 +274,18 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator {
    }

    protected function update() {
-        // Create the attribute using createAttribute because setAttribute has been
-        // extended to use TokenList when necessary.
+        # A DOMTokenList object’s update steps are:
+        #
+        # 1. If the associated element does not have an associated attribute and token
+        # set is empty, then return.
+        // Not sure what this is about. This class is constructed with a provided
+        // associated element and attribute; there is no need to do this.
+
+        # 2. Set an attribute value for the associated element using associated
+        # attribute’s local name and the result of running the ordered set serializer
+        # for token set.
        $element = $this->element->get();
-        $doc = $element->ownerDocument;
-        $class = $doc->createAttribute($this->localName);
+        $class = $element->ownerDocument->createAttribute($this->localName);
        $class->value = $this->__toString();
        $element->appendChild($class);
    }
--- a/lib/Data.php
+++ b/lib/Data.php
@ -42,6 +42,7 @@ class Data {
    const DIGIT = '0123456789';
    const HEX = '0123456789ABCDEFabcdef';
    const WHITESPACE = "\t\n\x0C\x0D ";
+    const WHITESPACE_REGEX = '/[\t\n\x0c\x0D ]+/';
    const WHITESPACE_SAFE = "\t\x0C ";


@ -56,7 +57,7 @@ class Data {
        # User agents must use the following algorithm, called the encoding
        #   sniffing algorithm, to determine the character encoding to use
        #   when decoding a document in the first pass. This algorithm takes
-        #   as input any out-of-band metadata available to the user agent 
+        #   as input any out-of-band metadata available to the user agent
        #  (e.g. the Content-Type metadata of the document) and all the bytes
        #   available so far, and returns a character encoding and a confidence
        #   that is either tentative or certain.
@ -93,9 +94,9 @@ class Data {

    public function consume(): string {
        $char = $this->data->nextChar();
-        # Before the tokenization stage, the input stream must be 
+        # Before the tokenization stage, the input stream must be
        #   preprocessed by normalizing newlines.
-        # Thus, newlines in HTML DOMs are represented by U+000A LF characters, 
+        # Thus, newlines in HTML DOMs are represented by U+000A LF characters,
        #   and there are never any U+000D CR characters in the input to the tokenization stage.
        if ($char === "\r") {
            // if this is a CR+LF pair, skip the CR and note the normalization
@ -103,7 +104,7 @@ class Data {
                $char = $this->data->nextChar();
                $this->normalized[$this->data->posChar()] = true;
            }
-            // otherwise just silently change the character to LF; 
+            // otherwise just silently change the character to LF;
            // the bare CR will be trivial to process when seeking backwards
            else {
                $char = "\n";
@ -119,7 +120,7 @@ class Data {
                $this->eof = true;
            } else {
                $this->_column++;
-                $len = strlen($char);    
+                $len = strlen($char);
                $here = $this->data->posChar();
                if ($this->lastError < $here) {
                    // look for erroneous characters
@ -246,12 +247,12 @@ class Data {
                $line = $this->_line;
                $col = $this->_column;
                do {
-                    // If the current position is the start of a line, 
+                    // If the current position is the start of a line,
                    //  get the column position of the end of the previous line
                    if (isset($this->newlines[$pos])) {
                        $line--;
                        $col = $this->newlines[$pos];
-                        // If the newline was a normalized CR+LF pair, 
+                        // If the newline was a normalized CR+LF pair,
                        //  go back one extra character
                        if (isset($this->normalized[$pos])) {
                            $pos--;