Browse Source

Manual garbage collection when using class lists

wrapper-classes
Dustin Wilson 3 years ago
parent
commit
9fe14c1af1
  1. 10
      composer.json
  2. 41
      composer.lock
  3. 215
      lib/Element.php
  4. 4
      lib/TokenList.php
  5. 6
      lib/traits/ParentNode.php

10
composer.json

@ -5,7 +5,7 @@
"require": {
"php": ">=7.4",
"ext-dom": "*",
"mensbeam/html-parser": "dev-master",
"mensbeam/html-parser": ">=1.0",
"mensbeam/framework": "^1.0"
},
"scripts": {
@ -47,11 +47,5 @@
"bamarni/composer-bin-plugin": "^1.3",
"daux/daux.io": "^0.16.0",
"mikey179/vfsstream": "^1.6"
},
"repositories": [
{
"type": "git",
"url": "mensbeam-gitea:MensBeam/HTML-Parser.git"
}
]
}
}

41
composer.lock

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "fb8d8d217fab6f78a9bdf736c8da2316",
"content-hash": "e1451c0dbcfa654af5768e49e9d6ade6",
"packages": [
{
"name": "mensbeam/framework",
@ -44,11 +44,11 @@
},
{
"name": "mensbeam/html-parser",
"version": "dev-master",
"version": "1.0",
"source": {
"type": "git",
"url": "mensbeam-gitea:MensBeam/HTML-Parser.git",
"reference": "7b9a6ca472fe8ffa6e82f09a8848fbb4e320bb79"
"url": "https://code.mensbeam.com/MensBeam/HTML-Parser",
"reference": "c06caaabab7350823b10246e49594a41cf50401d"
},
"require": {
"ext-dom": "*",
@ -57,13 +57,11 @@
"php": ">=7.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"masterminds/html5": "^2.7"
"bamarni/composer-bin-plugin": "^1.3"
},
"suggest": {
"ext-ctype": "Improved performance"
},
"default-branch": true,
"type": "library",
"autoload": {
"psr-4": {
@ -78,20 +76,7 @@
"lib/Parser/ctype.php"
]
},
"autoload-dev": {
"psr-4": {
"MensBeam\\HTML\\Test\\": "tests/lib/",
"MensBeam\\HTML\\TestCase\\": "tests/cases/"
}
},
"scripts": {
"post-install-cmd": [
"@composer bin all install"
],
"post-update-cmd": [
"@composer bin all update"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
@ -108,7 +93,15 @@
}
],
"description": "Parses modern HTML text into a PHP DOMDocument",
"time": "2021-10-11T19:35:44+00:00"
"keywords": [
"HTML5",
"WHATWG",
"dom",
"html",
"parser",
"parsing"
],
"time": "2021-10-12T00:23:16+00:00"
},
{
"name": "mensbeam/intl",
@ -2620,9 +2613,7 @@
],
"aliases": [],
"minimum-stability": "stable",
"stability-flags": {
"mensbeam/html-parser": 20
},
"stability-flags": [],
"prefer-stable": false,
"prefer-lowest": false,
"platform": {

215
lib/Element.php

@ -15,15 +15,8 @@ class Element extends \DOMElement {
use ChildNode, DocumentOrElement, MagicProperties, Moonwalk, ParentNode, ToString, Walk;
protected ?TokenList $_classList = null;
protected function __get_classList(): TokenList {
// Only create the class list if it is actually used.
if ($this->_classList === null) {
$this->_classList = new TokenList($this, 'class');
}
return $this->_classList;
return new TokenList($this, 'class');
}
protected function __get_innerHTML(): string {
@ -153,7 +146,7 @@ class Element extends \DOMElement {
# The getAttribute(qualifiedName) method steps are:
#
# 1. Let attr be the result of getting an attribute given qualifiedName and this.
$attr = $this->_getAttributeNode($qualifiedName);
$attr = $this->getAttributeNode($qualifiedName);
# 2. If attr is null, return null.
if ($attr === null) {
return null;
@ -176,32 +169,72 @@ class Element extends \DOMElement {
public function getAttributeNode(string $qualifiedName): ?Attr {
# The getAttributeNode(qualifiedName) method steps are to return the result of
# getting an attribute given qualifiedName and this.
$result = $this->_getAttributeNode($qualifiedName);
// More classlist bullshit. Since we cannot extend \DOMAttr in a way that will
// allow us to set the classList if a class attribute's value is modified we
// will instead remove the classList and force it to be recreated when a class
// attribute is requested.
if ($result !== null && $result->name === 'class') {
$this->_classList = null;
#
# To get an attribute by name given a qualifiedName and element element, run
# these steps:
#
# 1. If element is in the HTML namespace and its node document is an HTML document,
# then set qualifiedName to qualifiedName in ASCII lowercase.
// Document will always be an HTML document
if ($this->isHTMLNamespace()) {
$qualifiedName = strtolower($qualifiedName);
}
return $result;
# 2. Return the first attribute in element’s attribute list whose qualified name is
# qualifiedName; otherwise null.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$attr = parent::getAttributeNode($qualifiedName);
if ($attr === false) {
// Replace any offending characters with "UHHHHHH" where H are the uppercase
// hexadecimal digits of the character's code point
$qualifiedName = $this->coerceName($qualifiedName);
foreach ($this->attributes as $a) {
if ($a->nodeName === $qualifiedName) {
return $a;
}
}
return null;
}
return ($attr !== false) ? $attr : null;
}
public function getAttributeNodeNS(?string $namespace = null, string $localName): ?Attr {
# The getAttributeNodeNS(namespace, localName) method steps are to return the
# result of getting an attribute given namespace, localName, and this.
$result = $this->_getAttributeNodeNS($namespace, $localName);
// More classlist bullshit. Since we cannot extend \DOMAttr in a way that will
// allow us to set the classList if a class attribute's value is modified we
// will instead remove the classList and force it to be recreated when a class
// attribute is requested.
if ($result !== null && $result->name === 'class') {
$this->_classList = null;
ElementMap::delete($this);
#
# To get an attribute by namespace and local name given a namespace, localName,
# and element element, run these steps:
#
# 1. If namespace is the empty string, then set it to null.
if ($namespace === '') {
$namespace = null;
}
return $result;
# 2. Return the attribute in element’s attribute list whose namespace is namespace
# and local name is localName, if any; otherwise null.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$value = parent::getAttributeNodeNS($namespace, $localName);
if (!$value) {
// Replace any offending characters with "UHHHHHH" where H are the uppercase
// hexadecimal digits of the character's code point
$namespace = $this->coerceName($namespace ?? '');
$localName = $this->coerceName($localName);
// The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes
// sometimes, too... so this will get those as well in those circumstances.
foreach ($this->attributes as $a) {
if ($a->namespaceURI === $namespace && $a->localName === $localName) {
return $a;
}
}
return null;
}
return ($value !== false) ? $value : null;
}
@ -210,7 +243,7 @@ class Element extends \DOMElement {
#
# 1. Let attr be the result of getting an attribute given namespace, localName,
# and this.
$attr = $this->_getAttributeNodeNS($namespace, $localName);
$attr = $this->getAttributeNodeNS($namespace, $localName);
# 2. If attr is null, return null.
if ($attr === null) {
@ -242,7 +275,7 @@ class Element extends \DOMElement {
// The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes,
// so try it again just in case; getAttributeNode will coerce names if
// necessary, too.
$value = ($this->_getAttributeNode($qualifiedName) !== null);
$value = ($this->getAttributeNode($qualifiedName) !== null);
}
return $value;
@ -266,7 +299,7 @@ class Element extends \DOMElement {
// The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes,
// so try it again just in case; getAttributeNode will coerce names if
// necessary, too.
$value = ($this->_getAttributeNodeNS($namespace, $localName) !== null);
$value = ($this->getAttributeNodeNS($namespace, $localName) !== null);
}
return $value;
@ -280,7 +313,7 @@ class Element extends \DOMElement {
## these steps:
##
## 1. Let attr be the result of getting an attribute given qualifiedName and element.
$attr = $this->_getAttributeNode($qualifiedName);
$attr = $this->getAttributeNode($qualifiedName);
## 2. If attr is non-null, then remove attr.
if ($attr !== null) {
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
@ -288,8 +321,7 @@ class Element extends \DOMElement {
parent::removeAttributeNode($attr);
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class' && $this->_classList !== null) {
$this->_classList = null;
if ($qualifiedName === 'class') {
ElementMap::delete($this);
}
}
@ -304,12 +336,17 @@ class Element extends \DOMElement {
## To remove an attribute by namespace and local name given a namespace, localName, and element element, run these steps:
##
## 1. Let attr be the result of getting an attribute given namespace, localName, and element.
$attr = $this->_getAttributeNodeNS($namespace, $localName);
$attr = $this->getAttributeNodeNS($namespace, $localName);
## 2. If attr is non-null, then remove attr.
if ($attr !== null) {
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
parent::removeAttributeNode($attr);
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class') {
ElementMap::delete($this);
}
}
## 3. Return attr.
// Supposed to return undefined in the end, so let's skip this.
@ -336,18 +373,11 @@ class Element extends \DOMElement {
# attribute to this, and then return.
# 5. Change attribute to value.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster. But, first, we must work around PHP's
// garbage garbage collection.
if ($qualifiedName === 'class' && $this->_classList !== null) {
if ($value !== '') {
$this->_classList->value = $value;
return;
} else {
$this->_classList = null;
ElementMap::delete($this);
}
// when we can because it's faster.
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class' && $value === '') {
ElementMap::delete($this);
}
try {
parent::setAttributeNS(null, $qualifiedName, $value);
} catch (\DOMException $e) {
@ -362,6 +392,16 @@ class Element extends \DOMElement {
if ($qualifiedName === 'id' && $namespaceURI === null) {
$this->setIdAttribute($qualifiedName, true);
}
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class') {
ElementMap::delete($this);
}
// If you create an id attribute this way it won't be used by PHP in
// getElementById, so let's fix that.
elseif ($qualifiedName === 'id') {
$this->setIdAttribute($qualifiedName, true);
}
}
public function setAttributeNS(?string $namespace, string $qualifiedName, string $value): void {
@ -373,18 +413,7 @@ class Element extends \DOMElement {
# 2. Set an attribute value for this using localName, value, and also prefix and
# namespace.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster. But, first, we must work around a couple of
// PHP bugs and its garbage garbage collection.
if ($qualifiedName === 'class' && $this->_classList !== null) {
if ($value !== '') {
$this->_classList->value = $value;
return;
} else {
$this->_classList = null;
ElementMap::delete($this);
}
}
// when we can because it's faster.
if ($namespace === Parser::XMLNS_NAMESPACE) {
// NOTE: We create attribute nodes so that xmlns attributes
// don't get lost; otherwise they cannot be serialized
@ -414,74 +443,16 @@ class Element extends \DOMElement {
}
}
if ($qualifiedName === 'id' && $namespaceURI === null) {
$this->setIdAttribute($qualifiedName, true);
}
}
protected function _getAttributeNode(string $qualifiedName): ?Attr {
# To get an attribute by name given a qualifiedName and element element, run
# these steps:
#
# 1. If element is in the HTML namespace and its node document is an HTML document,
# then set qualifiedName to qualifiedName in ASCII lowercase.
// Document will always be an HTML document
if ($this->isHTMLNamespace()) {
$qualifiedName = strtolower($qualifiedName);
}
# 2. Return the first attribute in element’s attribute list whose qualified name is
# qualifiedName; otherwise null.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$attr = parent::getAttributeNode($qualifiedName);
if ($attr === false) {
// Replace any offending characters with "UHHHHHH" where H are the uppercase
// hexadecimal digits of the character's code point
$qualifiedName = $this->coerceName($qualifiedName);
foreach ($this->attributes as $a) {
if ($a->nodeName === $qualifiedName) {
return $a;
}
if ($namespace === null) {
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class') {
ElementMap::delete($this);
}
return null;
}
return ($attr !== false) ? $attr : null;
}
protected function _getAttributeNodeNS(?string $namespace = null, string $localName): ?Attr {
# To get an attribute by namespace and local name given a namespace, localName,
# and element element, run these steps:
#
# 1. If namespace is the empty string, then set it to null.
if ($namespace === '') {
$namespace = null;
}
# 2. Return the attribute in element’s attribute list whose namespace is namespace
# and local name is localName, if any; otherwise null.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$value = parent::getAttributeNodeNS($namespace, $localName);
if (!$value) {
// Replace any offending characters with "UHHHHHH" where H are the uppercase
// hexadecimal digits of the character's code point
$namespace = $this->coerceName($namespace ?? '');
$localName = $this->coerceName($localName);
// The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes
// sometimes, too... so this will get those as well in those circumstances.
foreach ($this->attributes as $a) {
if ($a->namespaceURI === $namespace && $a->localName === $localName) {
return $a;
}
// If you create an id attribute this way it won't be used by PHP in
// getElementById, so let's fix that.
elseif ($qualifiedName === 'id') {
$this->setIdAttribute($qualifiedName, true);
}
return null;
}
return ($value !== false) ? $value : null;
}
}

4
lib/TokenList.php

@ -52,6 +52,10 @@ class TokenList implements \ArrayAccess, \Countable, \Iterator {
# 1. Let element be associated element.
// Using a weak reference here to prevent a circular reference.
$this->element = \WeakReference::create($element);
// Store the element somewhere statically because PHP's garbage collection is
// itself garbage. This seems to contradict using a WeakReference, and it does.
// However, it simply doesn't work otherwise because PHP does reference counting
// for garbage collection. Attempts are made elsewhere to garbage collect.
ElementMap::add($element);
# 2. Let localName be associated attribute’s local name.
$this->localName = $attributeLocalName;

6
lib/traits/ParentNode.php

@ -30,7 +30,7 @@ trait ParentNode {
$this->preInsertionValidity($node);
$result = parent::appendChild($node);
if ($result !== false && $result instanceof HTMLTemplateElement) {
if ($result !== false && $node instanceof HTMLTemplateElement) {
ElementMap::add($node);
}
return $node;
@ -48,7 +48,7 @@ trait ParentNode {
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $child instanceof HTMLTemplateElement) {
if ($result !== false && $child instanceof Element) {
ElementMap::delete($child);
}
return $child;
@ -61,7 +61,7 @@ trait ParentNode {
if ($node instanceof HTMLTemplateElement) {
ElementMap::add($node);
}
if ($child instanceof HTMLTemplateElement) {
if ($child instanceof Element) {
ElementMap::delete($child);
}
}

Loading…
Cancel
Save