diff --git a/composer.json b/composer.json index ec2948b..1790293 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,7 @@ "require": { "php": ">=7.4", "ext-dom": "*", - "mensbeam/html-parser": "dev-master", + "mensbeam/html-parser": ">=1.0", "mensbeam/framework": "^1.0" }, "scripts": { @@ -47,11 +47,5 @@ "bamarni/composer-bin-plugin": "^1.3", "daux/daux.io": "^0.16.0", "mikey179/vfsstream": "^1.6" - }, - "repositories": [ - { - "type": "git", - "url": "mensbeam-gitea:MensBeam/HTML-Parser.git" - } - ] + } } diff --git a/composer.lock b/composer.lock index ac560d6..6d3c5dd 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "fb8d8d217fab6f78a9bdf736c8da2316", + "content-hash": "e1451c0dbcfa654af5768e49e9d6ade6", "packages": [ { "name": "mensbeam/framework", @@ -44,11 +44,11 @@ }, { "name": "mensbeam/html-parser", - "version": "dev-master", + "version": "1.0", "source": { "type": "git", - "url": "mensbeam-gitea:MensBeam/HTML-Parser.git", - "reference": "7b9a6ca472fe8ffa6e82f09a8848fbb4e320bb79" + "url": "https://code.mensbeam.com/MensBeam/HTML-Parser", + "reference": "c06caaabab7350823b10246e49594a41cf50401d" }, "require": { "ext-dom": "*", @@ -57,13 +57,11 @@ "php": ">=7.1" }, "require-dev": { - "bamarni/composer-bin-plugin": "^1.3", - "masterminds/html5": "^2.7" + "bamarni/composer-bin-plugin": "^1.3" }, "suggest": { "ext-ctype": "Improved performance" }, - "default-branch": true, "type": "library", "autoload": { "psr-4": { @@ -78,20 +76,7 @@ "lib/Parser/ctype.php" ] }, - "autoload-dev": { - "psr-4": { - "MensBeam\\HTML\\Test\\": "tests/lib/", - "MensBeam\\HTML\\TestCase\\": "tests/cases/" - } - }, - "scripts": { - "post-install-cmd": [ - "@composer bin all install" - ], - "post-update-cmd": [ - "@composer bin all update" - ] - }, + "notification-url": "https://packagist.org/downloads/", "license": [ "MIT" ], @@ -108,7 +93,15 @@ } ], "description": "Parses modern HTML text into a PHP DOMDocument", - "time": "2021-10-11T19:35:44+00:00" + "keywords": [ + "HTML5", + "WHATWG", + "dom", + "html", + "parser", + "parsing" + ], + "time": "2021-10-12T00:23:16+00:00" }, { "name": "mensbeam/intl", @@ -2620,9 +2613,7 @@ ], "aliases": [], "minimum-stability": "stable", - "stability-flags": { - "mensbeam/html-parser": 20 - }, + "stability-flags": [], "prefer-stable": false, "prefer-lowest": false, "platform": { diff --git a/lib/Element.php b/lib/Element.php index 16b229c..c9e0612 100644 --- a/lib/Element.php +++ b/lib/Element.php @@ -15,15 +15,8 @@ class Element extends \DOMElement { use ChildNode, DocumentOrElement, MagicProperties, Moonwalk, ParentNode, ToString, Walk; - protected ?TokenList $_classList = null; - protected function __get_classList(): TokenList { - // Only create the class list if it is actually used. - if ($this->_classList === null) { - $this->_classList = new TokenList($this, 'class'); - } - - return $this->_classList; + return new TokenList($this, 'class'); } protected function __get_innerHTML(): string { @@ -153,7 +146,7 @@ class Element extends \DOMElement { # The getAttribute(qualifiedName) method steps are: # # 1. Let attr be the result of getting an attribute given qualifiedName and this. - $attr = $this->_getAttributeNode($qualifiedName); + $attr = $this->getAttributeNode($qualifiedName); # 2. If attr is null, return null. if ($attr === null) { return null; @@ -176,32 +169,72 @@ class Element extends \DOMElement { public function getAttributeNode(string $qualifiedName): ?Attr { # The getAttributeNode(qualifiedName) method steps are to return the result of # getting an attribute given qualifiedName and this. - $result = $this->_getAttributeNode($qualifiedName); - // More classlist bullshit. Since we cannot extend \DOMAttr in a way that will - // allow us to set the classList if a class attribute's value is modified we - // will instead remove the classList and force it to be recreated when a class - // attribute is requested. - if ($result !== null && $result->name === 'class') { - $this->_classList = null; + # + # To get an attribute by name given a qualifiedName and element element, run + # these steps: + # + # 1. If element is in the HTML namespace and its node document is an HTML document, + # then set qualifiedName to qualifiedName in ASCII lowercase. + // Document will always be an HTML document + if ($this->isHTMLNamespace()) { + $qualifiedName = strtolower($qualifiedName); } - return $result; + # 2. Return the first attribute in element’s attribute list whose qualified name is + # qualifiedName; otherwise null. + // Going to try to handle this by getting the PHP DOM to do the heavy lifting + // when we can because it's faster. + $attr = parent::getAttributeNode($qualifiedName); + if ($attr === false) { + // Replace any offending characters with "UHHHHHH" where H are the uppercase + // hexadecimal digits of the character's code point + $qualifiedName = $this->coerceName($qualifiedName); + + foreach ($this->attributes as $a) { + if ($a->nodeName === $qualifiedName) { + return $a; + } + } + return null; + } + + return ($attr !== false) ? $attr : null; } public function getAttributeNodeNS(?string $namespace = null, string $localName): ?Attr { # The getAttributeNodeNS(namespace, localName) method steps are to return the # result of getting an attribute given namespace, localName, and this. - $result = $this->_getAttributeNodeNS($namespace, $localName); - // More classlist bullshit. Since we cannot extend \DOMAttr in a way that will - // allow us to set the classList if a class attribute's value is modified we - // will instead remove the classList and force it to be recreated when a class - // attribute is requested. - if ($result !== null && $result->name === 'class') { - $this->_classList = null; - ElementMap::delete($this); + # + # To get an attribute by namespace and local name given a namespace, localName, + # and element element, run these steps: + # + # 1. If namespace is the empty string, then set it to null. + if ($namespace === '') { + $namespace = null; } - return $result; + # 2. Return the attribute in element’s attribute list whose namespace is namespace + # and local name is localName, if any; otherwise null. + // Going to try to handle this by getting the PHP DOM to do the heavy lifting + // when we can because it's faster. + $value = parent::getAttributeNodeNS($namespace, $localName); + if (!$value) { + // Replace any offending characters with "UHHHHHH" where H are the uppercase + // hexadecimal digits of the character's code point + $namespace = $this->coerceName($namespace ?? ''); + $localName = $this->coerceName($localName); + + // The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes + // sometimes, too... so this will get those as well in those circumstances. + foreach ($this->attributes as $a) { + if ($a->namespaceURI === $namespace && $a->localName === $localName) { + return $a; + } + } + return null; + } + + return ($value !== false) ? $value : null; } @@ -210,7 +243,7 @@ class Element extends \DOMElement { # # 1. Let attr be the result of getting an attribute given namespace, localName, # and this. - $attr = $this->_getAttributeNodeNS($namespace, $localName); + $attr = $this->getAttributeNodeNS($namespace, $localName); # 2. If attr is null, return null. if ($attr === null) { @@ -242,7 +275,7 @@ class Element extends \DOMElement { // The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes, // so try it again just in case; getAttributeNode will coerce names if // necessary, too. - $value = ($this->_getAttributeNode($qualifiedName) !== null); + $value = ($this->getAttributeNode($qualifiedName) !== null); } return $value; @@ -266,7 +299,7 @@ class Element extends \DOMElement { // The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes, // so try it again just in case; getAttributeNode will coerce names if // necessary, too. - $value = ($this->_getAttributeNodeNS($namespace, $localName) !== null); + $value = ($this->getAttributeNodeNS($namespace, $localName) !== null); } return $value; @@ -280,7 +313,7 @@ class Element extends \DOMElement { ## these steps: ## ## 1. Let attr be the result of getting an attribute given qualifiedName and element. - $attr = $this->_getAttributeNode($qualifiedName); + $attr = $this->getAttributeNode($qualifiedName); ## 2. If attr is non-null, then remove attr. if ($attr !== null) { // Going to try to handle this by getting the PHP DOM to do the heavy lifting @@ -288,8 +321,7 @@ class Element extends \DOMElement { parent::removeAttributeNode($attr); // ClassList stuff because php garbage collection is... garbage. - if ($qualifiedName === 'class' && $this->_classList !== null) { - $this->_classList = null; + if ($qualifiedName === 'class') { ElementMap::delete($this); } } @@ -304,12 +336,17 @@ class Element extends \DOMElement { ## To remove an attribute by namespace and local name given a namespace, localName, and element element, run these steps: ## ## 1. Let attr be the result of getting an attribute given namespace, localName, and element. - $attr = $this->_getAttributeNodeNS($namespace, $localName); + $attr = $this->getAttributeNodeNS($namespace, $localName); ## 2. If attr is non-null, then remove attr. if ($attr !== null) { // Going to try to handle this by getting the PHP DOM to do the heavy lifting // when we can because it's faster. parent::removeAttributeNode($attr); + + // ClassList stuff because php garbage collection is... garbage. + if ($qualifiedName === 'class') { + ElementMap::delete($this); + } } ## 3. Return attr. // Supposed to return undefined in the end, so let's skip this. @@ -336,18 +373,11 @@ class Element extends \DOMElement { # attribute to this, and then return. # 5. Change attribute to value. // Going to try to handle this by getting the PHP DOM to do the heavy lifting - // when we can because it's faster. But, first, we must work around PHP's - // garbage garbage collection. - if ($qualifiedName === 'class' && $this->_classList !== null) { - if ($value !== '') { - $this->_classList->value = $value; - return; - } else { - $this->_classList = null; - ElementMap::delete($this); - } + // when we can because it's faster. + // ClassList stuff because php garbage collection is... garbage. + if ($qualifiedName === 'class' && $value === '') { + ElementMap::delete($this); } - try { parent::setAttributeNS(null, $qualifiedName, $value); } catch (\DOMException $e) { @@ -362,6 +392,16 @@ class Element extends \DOMElement { if ($qualifiedName === 'id' && $namespaceURI === null) { $this->setIdAttribute($qualifiedName, true); } + + // ClassList stuff because php garbage collection is... garbage. + if ($qualifiedName === 'class') { + ElementMap::delete($this); + } + // If you create an id attribute this way it won't be used by PHP in + // getElementById, so let's fix that. + elseif ($qualifiedName === 'id') { + $this->setIdAttribute($qualifiedName, true); + } } public function setAttributeNS(?string $namespace, string $qualifiedName, string $value): void { @@ -373,18 +413,7 @@ class Element extends \DOMElement { # 2. Set an attribute value for this using localName, value, and also prefix and # namespace. // Going to try to handle this by getting the PHP DOM to do the heavy lifting - // when we can because it's faster. But, first, we must work around a couple of - // PHP bugs and its garbage garbage collection. - if ($qualifiedName === 'class' && $this->_classList !== null) { - if ($value !== '') { - $this->_classList->value = $value; - return; - } else { - $this->_classList = null; - ElementMap::delete($this); - } - } - + // when we can because it's faster. if ($namespace === Parser::XMLNS_NAMESPACE) { // NOTE: We create attribute nodes so that xmlns attributes // don't get lost; otherwise they cannot be serialized @@ -414,74 +443,16 @@ class Element extends \DOMElement { } } - if ($qualifiedName === 'id' && $namespaceURI === null) { - $this->setIdAttribute($qualifiedName, true); - } - } - - - protected function _getAttributeNode(string $qualifiedName): ?Attr { - # To get an attribute by name given a qualifiedName and element element, run - # these steps: - # - # 1. If element is in the HTML namespace and its node document is an HTML document, - # then set qualifiedName to qualifiedName in ASCII lowercase. - // Document will always be an HTML document - if ($this->isHTMLNamespace()) { - $qualifiedName = strtolower($qualifiedName); - } - - # 2. Return the first attribute in element’s attribute list whose qualified name is - # qualifiedName; otherwise null. - // Going to try to handle this by getting the PHP DOM to do the heavy lifting - // when we can because it's faster. - $attr = parent::getAttributeNode($qualifiedName); - if ($attr === false) { - // Replace any offending characters with "UHHHHHH" where H are the uppercase - // hexadecimal digits of the character's code point - $qualifiedName = $this->coerceName($qualifiedName); - - foreach ($this->attributes as $a) { - if ($a->nodeName === $qualifiedName) { - return $a; - } + if ($namespace === null) { + // ClassList stuff because php garbage collection is... garbage. + if ($qualifiedName === 'class') { + ElementMap::delete($this); } - return null; - } - - return ($attr !== false) ? $attr : null; - } - - protected function _getAttributeNodeNS(?string $namespace = null, string $localName): ?Attr { - # To get an attribute by namespace and local name given a namespace, localName, - # and element element, run these steps: - # - # 1. If namespace is the empty string, then set it to null. - if ($namespace === '') { - $namespace = null; - } - - # 2. Return the attribute in element’s attribute list whose namespace is namespace - # and local name is localName, if any; otherwise null. - // Going to try to handle this by getting the PHP DOM to do the heavy lifting - // when we can because it's faster. - $value = parent::getAttributeNodeNS($namespace, $localName); - if (!$value) { - // Replace any offending characters with "UHHHHHH" where H are the uppercase - // hexadecimal digits of the character's code point - $namespace = $this->coerceName($namespace ?? ''); - $localName = $this->coerceName($localName); - - // The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes - // sometimes, too... so this will get those as well in those circumstances. - foreach ($this->attributes as $a) { - if ($a->namespaceURI === $namespace && $a->localName === $localName) { - return $a; - } + // If you create an id attribute this way it won't be used by PHP in + // getElementById, so let's fix that. + elseif ($qualifiedName === 'id') { + $this->setIdAttribute($qualifiedName, true); } - return null; } - - return ($value !== false) ? $value : null; } } diff --git a/lib/TokenList.php b/lib/TokenList.php index 1b8fa17..bf6766d 100644 --- a/lib/TokenList.php +++ b/lib/TokenList.php @@ -52,6 +52,10 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator { # 1. Let element be associated element. // Using a weak reference here to prevent a circular reference. $this->element = \WeakReference::create($element); + // Store the element somewhere statically because PHP's garbage collection is + // itself garbage. This seems to contradict using a WeakReference, and it does. + // However, it simply doesn't work otherwise because PHP does reference counting + // for garbage collection. Attempts are made elsewhere to garbage collect. ElementMap::add($element); # 2. Let localName be associated attribute’s local name. $this->localName = $attributeLocalName; diff --git a/lib/traits/ParentNode.php b/lib/traits/ParentNode.php index 6da3bf2..9338b1d 100644 --- a/lib/traits/ParentNode.php +++ b/lib/traits/ParentNode.php @@ -30,7 +30,7 @@ trait ParentNode { $this->preInsertionValidity($node); $result = parent::appendChild($node); - if ($result !== false && $result instanceof HTMLTemplateElement) { + if ($result !== false && $node instanceof HTMLTemplateElement) { ElementMap::add($node); } return $node; @@ -48,7 +48,7 @@ trait ParentNode { public function removeChild($child) { $result = parent::removeChild($child); - if ($result !== false && $child instanceof HTMLTemplateElement) { + if ($result !== false && $child instanceof Element) { ElementMap::delete($child); } return $child; @@ -61,7 +61,7 @@ trait ParentNode { if ($node instanceof HTMLTemplateElement) { ElementMap::add($node); } - if ($child instanceof HTMLTemplateElement) { + if ($child instanceof Element) { ElementMap::delete($child); } }