Browse Source

Document changes

• Made Document::createAttribute and Document::createAttributeNS more in line with the DOM spec.
• Tests still fail because they're wrong
wrapper-classes
Dustin Wilson 3 years ago
parent
commit
875963e697
  1. 2
      composer.lock
  2. 152
      lib/Document.php
  3. 2
      lib/HTMLTemplateElement.php
  4. 2
      vendor-bin/phpunit/composer.lock
  5. 2
      vendor-bin/robo/composer.lock

2
composer.lock

@ -2479,5 +2479,5 @@
"ext-dom": "*"
},
"platform-dev": [],
"plugin-api-version": "2.0.0"
"plugin-api-version": "2.1.0"
}

152
lib/Document.php

@ -25,12 +25,16 @@ class Document extends AbstractDocument {
// List of elements that are treated as block elements for the purposes of
// output formatting when serializing
protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ];
// Regex used to validate names when creating elements.
protected const NAME_PRODUCTION_REGEX = '/^[:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/Su';
// List of h-elements used when determining extra spacing for the purposes of
// output formatting when serializing
protected const H_ELEMENTS = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ];
// List of preformatted elements where content is ignored for the purposes of
// output formatting when serializing
protected const PREFORMATTED_ELEMENTS = [ 'iframe', 'listing', 'noembed', 'noframes', 'noscript', 'plaintext', 'pre', 'style', 'script', 'textarea', 'title', 'xmp' ];
// Regex used to validate qualified names when creating namespaced elements.
protected const QNAME_PRODUCTION_REGEX = '/^([A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*:)?[A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/Su';
// List of elements which are self-closing; used when serializing
protected const VOID_ELEMENTS = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
@ -150,17 +154,44 @@ class Document extends AbstractDocument {
}
public function createAttribute($name) {
return $this->createAttributeNS(null, $name);
}
public function createAttribute($localName): \DOMAttr {
# The createAttribute(localName) method steps are:
# 1. If localName does not match the Name production in XML, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match(self::NAME_PRODUCTION_REGEX, $localName) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
public function createAttributeNS($namespaceURI, $qualifiedName) {
// Normalize the attribute name and namespace URI per modern DOM specifications.
if ($namespaceURI !== null) {
$namespaceURI = trim($namespaceURI);
# 2. If this is an HTML document, then set localName to localName in ASCII
# lowercase.
// This will always be an HTML document
$localName = strtolower($localName);
# 3. Return a new attribute whose local name is localName and node document is
# this.
// We need to do a couple more things here. PHP's XML-based DOM doesn't allow
// some characters. We have to coerce them sometimes.
try {
return parent::createAttributeNS(null, $localName);
} catch (\DOMException $e) {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
$this->mangledAttributes = true;
return parent::createAttributeNS(null, $this->coerceName($localName));
}
$qualifiedName = trim($qualifiedName);
}
public function createAttributeNS($namespaceURI, $qualifiedName): \DOMAttr {
# The createAttributeNS(namespace, qualifiedName) method steps are:
# 1. Let namespace, prefix, and localName be the result of passing namespace and
# qualifiedName to validate and extract.
[ 'namespace' => $namespaceURI, 'prefix' => $prefix, 'localName' => $localName ] = $this->validateAndExtract($qualifiedName, $namespaceURI);
# 2. Return a new attribute whose namespace is namespace, namespace prefix is
# prefix, local name is localName, and node document is this.
// We need to do a couple more things here. PHP's XML-based DOM doesn't allow
// some characters. We have to coerce them sometimes.
try {
return parent::createAttributeNS($namespaceURI, $qualifiedName);
} catch (\DOMException $e) {
@ -192,7 +223,7 @@ class Document extends AbstractDocument {
# 1. If localName does not match the Name production, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match('/^[:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/u', $name) !== 1) {
if (preg_match(self::NAME_PRODUCTION_REGEX, $name) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
@ -211,7 +242,7 @@ class Document extends AbstractDocument {
try {
if ($name !== 'template') {
$e = parent::createElement($name);
$e = parent::createElementNS(null, $name);
} else {
$e = new HTMLTemplateElement($this, $name);
}
@ -221,7 +252,8 @@ class Document extends AbstractDocument {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
return parent::createElement($this->coerceName($name));
$this->mangledElements = true;
return parent::createElementNS(null, $this->coerceName($name));
}
}
@ -242,52 +274,8 @@ class Document extends AbstractDocument {
# 1. Let namespace, prefix, and localName be the result of passing namespace and
# qualifiedName to validate and extract.
[ 'namespace' => $namespaceURI, 'prefix' => $prefix, 'localName' => $localName ] = $this->validateAndExtract($qualifiedName, $namespaceURI);
## To validate and extract a namespace and qualifiedName, run these steps:
## 1. If namespace is the empty string, set it to null.
if ($namespaceURI === '') {
$namespaceURI = null;
}
## 2. Validate qualifiedName.
### To validate a qualifiedName, throw an "InvalidCharacterError" DOMException if
### qualifiedName does not match the QName production.
if (preg_match('/^([A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*:)?[A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/u', $qualifiedName) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
## 3. Let prefix be null.
$prefix = null;
## 4. Let localName be qualifiedName.
$localName = $qualifiedName;
## 5. If qualifiedName contains a ":" (U+003E), then split the string on it and
## set prefix to the part before and localName to the part after.
if (strpos($qualifiedName, ':') !== false) {
$temp = explode(':', $qualifiedName, 2);
$prefix = $temp[0];
$prefix = ($prefix !== '') ? $prefix : null;
$localName = $temp[1];
}
## 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
## 7. If prefix is "xml" and namespace is not the XML namespace, then throw a "NamespaceError" DOMException.
## 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS
## namespace, then throw a "NamespaceError" DOMException.
## 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is
## "xmlns", then throw a "NamespaceError" DOMException.
if (
($prefix !== null && $namespaceURI === null) ||
($prefix === 'xml' && $namespaceURI !== Parser::XML_NAMESPACE) ||
(($qualifiedName === 'xmlns' || $prefix === 'xmlns') && $namespaceURI !== Parser::XMLNS_NAMESPACE) ||
($namespaceURI === Parser::XMLNS_NAMESPACE && $qualifiedName !== 'xmlns' && $prefix !== 'xmlns')
) {
throw new DOMException(DOMException::NAMESPACE_ERROR);
}
## 10. Return namespace, prefix, and localName.
// Right-o.
# 2. Let is be null.
# 3. If options is a dictionary and options["is"] exists, then set is to it.
@ -901,6 +889,58 @@ class Document extends AbstractDocument {
return $s;
}
protected function validateAndExtract(string $qualifiedName, ?string $namespace = null): array {
# To validate and extract a namespace and qualifiedName, run these steps:
# 1. If namespace is the empty string, set it to null.
if ($namespace === '') {
$namespace = null;
}
# 2. Validate qualifiedName.
# To validate a qualifiedName, throw an "InvalidCharacterError" DOMException if
# qualifiedName does not match the QName production.
if (preg_match(self::QNAME_PRODUCTION_REGEX, $qualifiedName) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
# 3. Let prefix be null.
$prefix = null;
# 4. Let localName be qualifiedName.
$localName = $qualifiedName;
# 5. If qualifiedName contains a ":" (U+003E), then split the string on it and
# set prefix to the part before and localName to the part after.
if (strpos($qualifiedName, ':') !== false) {
$temp = explode(':', $qualifiedName, 2);
$prefix = $temp[0];
$prefix = ($prefix !== '') ? $prefix : null;
$localName = $temp[1];
}
# 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
# 7. If prefix is "xml" and namespace is not the XML namespace, then throw a "NamespaceError" DOMException.
# 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS
# namespace, then throw a "NamespaceError" DOMException.
# 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is
# "xmlns", then throw a "NamespaceError" DOMException.
if (
($prefix !== null && $namespace === null) ||
($prefix === 'xml' && $namespace !== Parser::XML_NAMESPACE) ||
(($qualifiedName === 'xmlns' || $prefix === 'xmlns') && $namespace !== Parser::XMLNS_NAMESPACE) ||
($namespace === Parser::XMLNS_NAMESPACE && $qualifiedName !== 'xmlns' && $prefix !== 'xmlns')
) {
throw new DOMException(DOMException::NAMESPACE_ERROR);
}
# 10. Return namespace, prefix, and localName.
return [
'namespace' => $namespace,
'prefix' => $prefix,
'localName' => $localName
];
}
private function convertTemplate(\DOMElement $element): \DOMElement {
if ($element->namespaceURI === null && $element->nodeName === 'template') {

2
lib/HTMLTemplateElement.php

@ -12,7 +12,7 @@ namespace MensBeam\HTML\DOM;
class HTMLTemplateElement extends Element {
public $content = null;
public function __construct(Document $ownerDocument, string $qualifiedName, ?string $namespace = null) {
public function __construct(Document $ownerDocument, string $qualifiedName, ?string $namespace = '') {
parent::__construct($qualifiedName, null, $namespace);
// Elements that are created by their constructor in PHP aren't owned by any

2
vendor-bin/phpunit/composer.lock

@ -2107,5 +2107,5 @@
"prefer-lowest": false,
"platform": [],
"platform-dev": [],
"plugin-api-version": "2.0.0"
"plugin-api-version": "2.1.0"
}

2
vendor-bin/robo/composer.lock

@ -2003,5 +2003,5 @@
"prefer-lowest": false,
"platform": [],
"platform-dev": [],
"plugin-api-version": "2.0.0"
"plugin-api-version": "2.1.0"
}

Loading…
Cancel
Save