Browse Source

Rewrote Document::createElement and Document::createElementNS

• Both now more closely follow the spec
wrapper-classes
Dustin Wilson 3 years ago
parent
commit
d093e43066
  1. 6
      composer.json
  2. 25
      composer.lock
  3. 10
      lib/DOMException.php
  4. 133
      lib/Document.php
  5. 10
      lib/ElementSet.php
  6. 4
      lib/HTMLTemplateElement.php
  7. 89
      tests/cases/TestDocument.php
  8. 2
      vendor-bin/phpunit/composer.lock
  9. 2
      vendor-bin/robo/composer.lock

6
composer.json

@ -38,6 +38,12 @@
"MensBeam\\HTML\\DOM\\TestCase\\": "tests/cases/"
}
},
"repositories": [
{
"type": "git",
"url": "mensbeam-gitea:MensBeam/HTML-Parser.git"
}
],
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"daux/daux.io": "^0.16.0"

25
composer.lock

@ -4,15 +4,15 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "1077b4324237a83eab2f418e6eb3f2d4",
"content-hash": "fa97f8577cdf49f5f1dc348f28cf141b",
"packages": [
{
"name": "mensbeam/html-parser",
"version": "dev-master",
"source": {
"type": "git",
"url": "https://code.mensbeam.com/MensBeam/HTML-Parser",
"reference": "49764a456743ac69074b4f64619cf87af254c071"
"url": "mensbeam-gitea:MensBeam/HTML-Parser.git",
"reference": "c6c3f725d09f4162fb58c6943f980af34c7435b2"
},
"require": {
"ext-dom": "*",
@ -42,7 +42,20 @@
"lib/Parser/ctype.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"autoload-dev": {
"psr-4": {
"MensBeam\\HTML\\Test\\": "tests/lib/",
"MensBeam\\HTML\\TestCase\\": "tests/cases/"
}
},
"scripts": {
"post-install-cmd": [
"@composer bin all install"
],
"post-update-cmd": [
"@composer bin all update"
]
},
"license": [
"MIT"
],
@ -59,7 +72,7 @@
}
],
"description": "Parses modern HTML text into a PHP DOMDocument",
"time": "2021-09-28T20:15:07+00:00"
"time": "2021-09-29T22:39:46+00:00"
},
{
"name": "mensbeam/intl",
@ -2466,5 +2479,5 @@
"ext-dom": "*"
},
"platform-dev": [],
"plugin-api-version": "2.1.0"
"plugin-api-version": "2.0.0"
}

10
lib/DOMException.php

@ -15,7 +15,12 @@ class DOMException extends \Exception {
const INVALID_CHARACTER = 5;
const NO_MODIFICATION_ALLOWED = 7;
const NOT_FOUND = 8;
const NOT_SUPPORTED = 9;
const SYNTAX_ERROR = 12;
const INVALID_MODIFICATION_ERROR = 13;
const NAMESPACE_ERROR = 14;
const INVALID_ACCESS_ERROR = 15;
const VALIDATION_ERROR = 16;
const OUTER_HTML_FAILED_NOPARENT = 101;
@ -26,7 +31,12 @@ class DOMException extends \Exception {
5 => 'Invalid character',
7 => 'Modification not allowed here',
8 => 'Not found error',
9 => 'Feature is not supported because %s',
12 => 'Syntax error',
13 => 'Invalid modification error',
14 => 'Namespace error',
15 => 'Invalid access error',
16 => 'Validation error',
101 => 'Failed to set the "outerHTML" property; the element does not have a parent node'
];

133
lib/Document.php

@ -177,37 +177,144 @@ class Document extends AbstractDocument {
}
}
public function createElement($name, $value = "") {
return $this->createElementNS(null, $name, $value);
public function createElement($name, $value = null): Element {
# The createElement(localName, options) method steps are:
// DEVIATION: We cannot follow the createElement parameters per the DOM spec
// because we cannot change the parameters from \DOMDOcument. This is okay
// because $options is currently just for the is attribute for custom elements.
// Since this implementation does not have support for scripting that argument
// would be useless anyway. Equally, the $value parameter from PHP's DOM is
// useless, so it is disabled in this implementation as it doesn't exist in the
// DOM spec.
if ($value !== null) {
throw new DOMException(DOMException::NOT_SUPPORTED, 'the value parameter is not in the official DOM specification; create a text node and append instead');
}
# 1. If localName does not match the Name production, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match('/^[:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/u', $name) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
# 2. If this is an HTML document, then set localName to localName in ASCII
# lowercase.
// This will always be an HTML document
$name = strtolower($name);
# 3. Let is be null.
# 4. If options is a dictionary and options["is"] exists, then set is to it.
# 5. Let namespace be the HTML namespace, if this is an HTML document or this’s
# content type is "application/xhtml+xml"; otherwise null.
# 6. Return the result of creating an element given this, localName, namespace,
# null, is, and with the synchronous custom elements flag set.
// DEVIATION: There is no scripting in this implementation.
try {
if ($name !== 'template') {
$e = parent::createElement($name);
} else {
$e = new HTMLTemplateElement($this, $name);
}
return $e;
} catch (\DOMException $e) {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
return parent::createElement($this->coerceName($name));
}
}
public function createElementNS($namespaceURI, $qualifiedName, $value = "") {
// Normalize the element name and namespace URI per modern DOM specifications.
if ($namespaceURI !== null) {
$namespaceURI = trim($namespaceURI);
$namespaceURI = ($namespaceURI === Parser::HTML_NAMESPACE) ? null : $namespaceURI;
public function createElementNS($namespaceURI, $qualifiedName, $value = null): Element {
# The internal createElementNS steps, given document, namespace, qualifiedName,
# and options, are as follows:
// DEVIATION: We cannot follow the createElement parameters per the DOM spec
// because we cannot change the parameters from \DOMDOcument. This is okay
// because $options is currently just for the is attribute for custom elements.
// Since this implementation does not have support for scripting that argument
// would be useless anyway. Equally, the $value parameter from PHP's DOM is
// useless, so it is disabled in this implementation as it doesn't exist in the
// DOM spec.
if ($value !== null) {
throw new DOMException(DOMException::NOT_SUPPORTED, 'the value parameter is not in the official DOM specification; create a text node and append instead');
}
$qualifiedName = ($namespaceURI === null) ? strtolower(trim($qualifiedName)) : trim($qualifiedName);
# 1. Let namespace, prefix, and localName be the result of passing namespace and
# qualifiedName to validate and extract.
## To validate and extract a namespace and qualifiedName, run these steps:
## 1. If namespace is the empty string, set it to null.
if ($namespaceURI === '') {
$namespaceURI = null;
}
## 2. Validate qualifiedName.
### To validate a qualifiedName, throw an "InvalidCharacterError" DOMException if
### qualifiedName does not match the QName production.
if (preg_match('/^([A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*:)?[A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/u', $qualifiedName) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
## 3. Let prefix be null.
$prefix = null;
## 4. Let localName be qualifiedName.
$localName = $qualifiedName;
## 5. If qualifiedName contains a ":" (U+003E), then split the string on it and
## set prefix to the part before and localName to the part after.
if (strpos($qualifiedName, ':') !== false) {
$temp = explode(':', $qualifiedName, 2);
$prefix = $temp[0];
$prefix = ($prefix !== '') ? $prefix : null;
$localName = $temp[1];
}
## 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
## 7. If prefix is "xml" and namespace is not the XML namespace, then throw a "NamespaceError" DOMException.
## 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS
## namespace, then throw a "NamespaceError" DOMException.
## 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is
## "xmlns", then throw a "NamespaceError" DOMException.
if (
($prefix !== null && $namespaceURI === null) ||
($prefix === 'xml' && $namespaceURI !== Parser::XML_NAMESPACE) ||
(($qualifiedName === 'xmlns' || $prefix === 'xmlns') && $namespaceURI !== Parser::XMLNS_NAMESPACE) ||
($namespaceURI === Parser::XMLNS_NAMESPACE && $qualifiedName !== 'xmlns' && $prefix !== 'xmlns')
) {
throw new DOMException(DOMException::NAMESPACE_ERROR);
}
## 10. Return namespace, prefix, and localName.
// Right-o.
# 2. Let is be null.
# 3. If options is a dictionary and options["is"] exists, then set is to it.
# 4. Return the result of creating an element given document, localName, namespace,
# prefix, is, and with the synchronous custom elements flag set.
// DEVIATION: There is no scripting in this implementation.
try {
if ($qualifiedName !== 'template' || $namespaceURI !== null) {
$e = parent::createElementNS($namespaceURI, $qualifiedName, $value);
$e = parent::createElementNS($namespaceURI, $qualifiedName);
} else {
$e = new HTMLTemplateElement($this, $qualifiedName, $value);
$e = new HTMLTemplateElement($this, $qualifiedName);
}
return $e;
} catch (\DOMException $e) {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
// uppercase hexadecimal digits of the character's code point
$this->mangledElements = true;
if ($namespaceURI !== null) {
$qualifiedName = implode(":", array_map([$this, "coerceName"], explode(":", $qualifiedName, 2)));
$qualifiedName = implode(':', array_map([ $this, 'coerceName' ], explode(':', $qualifiedName, 2)));
} else {
$qualifiedName = $this->coerceName($qualifiedName);
}
return parent::createElementNS($namespaceURI, $qualifiedName, $value);
return parent::createElementNS($namespaceURI, $qualifiedName);
}
}

10
lib/ElementSet.php

@ -8,10 +8,12 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
// This is a write-only set of elements which need to be kept in memory; it
// exists because values of properties on derived DOM classes are lost unless at
// least one PHP reference is kept for the element somewhere in userspace. This
// is that somewhere. It is at present only used for template elements.
// This is a set of elements which need to be kept in memory; it exists because
// of the peculiar way PHP works. Derived DOM classes (such as
// HTMLTemplateElement) won't remain as such in the DOM (meaning they will
// revert to being what is registered for elements in Document) unless at least
// one reference is kept for the element somewhere in userspace. This is that
// somewhere.
class ElementSet {
protected static $_storage = [];

4
lib/HTMLTemplateElement.php

@ -12,8 +12,8 @@ namespace MensBeam\HTML\DOM;
class HTMLTemplateElement extends Element {
public $content = null;
public function __construct(Document $ownerDocument, string $qualifiedName, ?string $value = null, string $namespace = '') {
parent::__construct($qualifiedName, $value, $namespace);
public function __construct(Document $ownerDocument, string $qualifiedName, ?string $namespace = null) {
parent::__construct($qualifiedName, null, $namespace);
// Elements that are created by their constructor in PHP aren't owned by any
// document and are readonly until owned by one. Temporarily append to a

89
tests/cases/TestDocument.php

@ -17,6 +17,39 @@ use MensBeam\HTML\Parser;
class TestDocument extends \PHPUnit\Framework\TestCase {
public function provideAttributeNodes(): iterable {
return [
[null, 'test', 'test', ''],
[null, 'test:test', 'testU00003Atest', ''],
[null, 'test', 'test', ''],
[null, 'TEST:TEST', 'TESTU00003ATEST', ''],
['fake_ns', 'test', 'test', ''],
['fake_ns', 'test:test', 'test', 'test'],
['fake_ns', 'TEST:TEST', 'TEST', 'TEST'],
['fake_ns', 'test:test:test', 'testU00003Atest', 'test'],
['fake_ns', 'TEST:TEST:TEST', 'TESTU00003ATEST', 'TEST'],
];
}
/**
* @dataProvider provideAttributeNodes
* @covers \MensBeam\HTML\DOM\Document::createAttribute
* @covers \MensBeam\HTML\DOM\Document::createAttributeNS
*/
public function testAttributeNodeCreation(?string $nsIn, string $nameIn, string $local, string $prefix): void {
$d = new Document();
$d->appendChild($d->createElement('html'));
$a = $d->createAttributeNS($nsIn, $nameIn);
$this->assertSame($local, $a->localName);
$this->assertSame($nsIn, $a->namespaceURI);
$this->assertSame($prefix, $a->prefix);
$d = new Document();
$d->appendChild($d->createElement('html'));
$a = $d->createAttribute($nameIn);
$this->assertSame(($prefix !== '') ? "{$prefix}U00003A{$local}" : $local, $a->nodeName);
}
/**
* @covers \MensBeam\HTML\DOM\Document::__construct
* @covers \MensBeam\HTML\DOM\Document::loadDOM
@ -39,4 +72,60 @@ class TestDocument extends \PHPUnit\Framework\TestCase {
$this->assertSame('MensBeam\HTML\DOM\Element', $d->firstChild::class);
$this->assertSame('html', $d->firstChild->nodeName);
}
public function provideElements(): iterable {
return [
// HTML element
[ '', null, 'div', 'div', Element::class ],
// HTML element and having to normalize local name
[ '', null, ' DIV', 'div', Element::class ],
// HTML element with a null namespace
[ null, null, 'div', 'div', Element::class ],
// HTML element with a null namespace and having to normalize local name
[ null, null, 'DIV ', 'div', Element::class ],
// HTML element with an HTML namespace
[ Parser::HTML_NAMESPACE, null, 'div', 'div', Element::class ],
// HTML element with an HTML namespace and having to normalize local name
[ Parser::HTML_NAMESPACE, null, ' DIV ', 'div', Element::class ],
// HTML element with an HTML namespace and having to normalize namespace URI &
// local name
[ strtoupper(Parser::HTML_NAMESPACE), null, ' DIV ', 'div', Element::class ],
// Template element
[ '', null, 'template', 'template', HTMLTemplateElement::class ],
// Template element and having to normalize local name
[ '', null, ' TEMPLATE', 'template', HTMLTemplateElement::class ],
// Template element with a null namespace
[ null, null, 'template', 'template', HTMLTemplateElement::class ],
// Template element with a null namespace and having to normalize local name
[ null, null, 'TEMPLATE ', 'template', HTMLTemplateElement::class ],
// Template element with an HTML namespace
[ Parser::HTML_NAMESPACE, null, 'template', 'template', HTMLTemplateElement::class ],
// Template element with an HTML namespace and having to normalize local name
[ Parser::HTML_NAMESPACE, null, ' TEMPLATE ', 'template', HTMLTemplateElement::class ],
// Template element with an HTML namespace and having to normalize namespace URI &
// local name
[ strtoupper(Parser::HTML_NAMESPACE), null, ' TEMPLATE ', 'template', HTMLTemplateElement::class ],
// SVG element with SVG namespace
[ Parser::SVG_NAMESPACE, Parser::SVG_NAMESPACE, 'svg', 'svg', Element::class ],
// SVG element with SVG namespace and having to normalize local name
[ Parser::SVG_NAMESPACE, Parser::SVG_NAMESPACE, ' SVG', 'SVG', Element::class ],
// SVG element with SVG namespace and having to normalize local name
[ strtoupper(Parser::SVG_NAMESPACE), Parser::SVG_NAMESPACE, ' SVG ', 'SVG', Element::class ]
];
}
/**
* @dataProvider provideElements
* @covers \MensBeam\HTML\DOM\Document::createElement
* @covers \MensBeam\HTML\DOM\Document::createElementNS
*/
public function testElementCreation(?string $nsIn, ?string $nsOut, string $localIn, string $localOut, string $class): void {
$d = new Document;
$n = ($nsIn !== '') ? $d->createElementNS($nsIn, $localIn) : $d->createElement($localIn);
$this->assertInstanceOf($class, $n);
$this->assertNotNull($n->ownerDocument);
$this->assertSame($nsOut, $n->namespaceURI);
$this->assertSame($localOut, $n->localName);
}
}

2
vendor-bin/phpunit/composer.lock

@ -2107,5 +2107,5 @@
"prefer-lowest": false,
"platform": [],
"platform-dev": [],
"plugin-api-version": "2.1.0"
"plugin-api-version": "2.0.0"
}

2
vendor-bin/robo/composer.lock

@ -2003,5 +2003,5 @@
"prefer-lowest": false,
"platform": [],
"platform-dev": [],
"plugin-api-version": "2.1.0"
"plugin-api-version": "2.0.0"
}

Loading…
Cancel
Save