Sort out namespaced attributes

This commit is contained in:
J. King 2021-03-18 12:40:54 -04:00
parent 02add5633a
commit 82621a11e3
5 changed files with 100 additions and 61 deletions

View file

@ -68,7 +68,7 @@ class ParseError {
const UNEXPECTED_CHAR = 211;
const UNEXPECTED_EOF = 212;
const UNEXPECTED_PARENT = 213;
const UNEXPECTED_ATTRIBUTE_VALUE = 214;
const INVALID_NAMESPACE_ATTRIBUTE_VALUE = 214;
const FOSTERED_START_TAG = 215;
const FOSTERED_END_TAG = 216;
const FOSTERED_CHAR = 217;
@ -87,7 +87,7 @@ class ParseError {
self::UNEXPECTED_CHAR => 'Unexpected character data',
self::UNEXPECTED_EOF => 'Unexpected end of file',
self::UNEXPECTED_PARENT => 'Start tag <%s> not valid in parent <%s>',
self::UNEXPECTED_ATTRIBUTE_VALUE => 'Unexpected value in attribute "%s"',
self::INVALID_NAMESPACE_ATTRIBUTE_VALUE => 'Invalid value for attribute "%s"; it must have value "%s" or be omitted',
self::FOSTERED_START_TAG => 'Start tag <%s> moved to before table',
self::FOSTERED_END_TAG => 'End tag </%s> moved to before table',
self::FOSTERED_CHAR => 'Character moved to before table',

View file

@ -70,41 +70,34 @@ abstract class TagToken extends Token {
$this->name = $name;
}
public function getAttribute(string $name) {
$key = $this->_getAttributeKey($name);
public function hasAttribute(string $name): bool {
return (!is_null($this->_getAttributeKey($name)));
}
return (isset($this->attributes[$key])) ? $this->attributes[$key] : null;
}
public function getAttribute(string $name) {
$key = $this->_getAttributeKey($name);
return (isset($this->attributes[$key])) ? $this->attributes[$key] : null;
}
public function hasAttribute(string $name): bool {
return (!is_null($this->_getAttributeKey($name)));
}
public function setAttribute(string $name, string $value) {
$key = $this->_getAttributeKey($name);
if (is_null($key)) {
$this->attributes[] = new TokenAttr($name, $value);
} else {
$attribute = &$this->attributes[$key];
$attribute->name = $name;
$attribute->value = $value;
}
}
public function removeAttribute(string $name) {
unset($this->attributes[$this->_getAttributeKey($name)]);
}
public function setAttribute(string $name, string $value) {
$key = $this->_getAttributeKey($name);
if (is_null($key)) {
$this->attributes[] = new TokenAttr($name, $value);
} else {
$attribute = &$this->attributes[$key];
$attribute->name = $name;
$attribute->value = $value;
}
}
private function _getAttributeKey(string $name) {
foreach ($this->attributes as $key => $a) {
if ($a->name === $name) {
return $key;
}
}
return null;
}
private function _getAttributeKey(string $name) {
foreach ($this->attributes as $key => $a) {
if ($a->name === $name) {
return $key;
}
}
return null;
}
}
class StartTagToken extends TagToken {
@ -120,8 +113,12 @@ class EOFToken extends Token {
}
class TokenAttr {
/** @var string The name of the attribute */
public $name;
/** @var string The attribute's value */
public $value;
/** @var string|null The attribute's namespace. This is normally null but may be set during tree construction */
public $namespace = null;
public function __construct(string $name, string $value) {
$this->name = $name;

View file

@ -192,11 +192,11 @@ class TreeBuilder {
'xlink:show' => Parser::XLINK_NAMESPACE,
'xlink:title' => Parser::XLINK_NAMESPACE,
'xlink:type' => Parser::XLINK_NAMESPACE,
'xml:base' => Parser::XML_NAMESPACE,
'xml:id' => Parser::XML_NAMESPACE, // DEVIATION
'xml:lang' => Parser::XML_NAMESPACE,
'xml:space' => Parser::XML_NAMESPACE,
'xmlns' => Parser::XMLNS_NAMESPACE,
'xmlns:xlink' => Parser::XLINK_NAMESPACE,
'xmlns:xlink' => Parser::XMLNS_NAMESPACE,
];
# The following elements have varying levels of special parsing rules: HTMLs
# address, applet, area, article, aside, base, basefont, bgsound, blockquote,
@ -4277,29 +4277,21 @@ class TreeBuilder {
$element = $document->createElementNS($namespace, $localName);
# Append each attribute in the given token to element.
foreach ($token->attributes as $attr) {
$ns = null;
if ($namespace) {
// Determine the namespace URI for the prefix, if any
if (strpos($attr->name, "xml:") === 0) {
$ns = Parser::XML_NAMESPACE;
} elseif (strpos($attr->name, "xmlns:") === 0) {
$ns = Parser::XMLNS_NAMESPACE;
} elseif (strpos($attr->name, "xlink:") === 0) {
$ns = Parser::XLINK_NAMESPACE;
}
# If element has an xmlns attribute in the XMLNS namespace whose value
# is not exactly the same as the element's namespace, that is a
# parse error. Similarly, if element has an xmlns:xlink attribute in
# the XMLNS namespace whose value is not the XLink Namespace, that
# is a parse error.
// NOTE: The specification is silent as to how to handle these
// attributes. We assume these bad attributes should be dropped,
// since they break the DOM when added
if ($attr->name === "xmlns" && $namespace !== null && $attr->value !== $namespace) {
$this->error(ParseError::INVALID_NAMESPACE_ATTRIBUTE_VALUE, "xmlns", $namespace);
} elseif ($attr->name === "xmlns:xlink" && $namespace !== null && $attr->value !== Parser::XLINK_NAMESPACE) {
$this->error(ParseError::INVALID_NAMESPACE_ATTRIBUTE_VALUE, "xmlns:xlink", Parser::XLINK_NAMESPACE);
} else {
$element->setAttributeNS($attr->namespace, $attr->name, $attr->value);
}
$element->setAttributeNS($ns, $attr->name, $attr->value);
}
# If element has an xmlns attribute in the XMLNS namespace whose value
# is not exactly the same as the element's namespace, that is a
# parse error. Similarly, if element has an xmlns:xlink attribute in
# the XMLNS namespace whose value is not the XLink Namespace, that
# is a parse error.
if ($element->hasAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns") && $element->getAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns") !== $element->namespaceURI) {
$this->error(ParseError::UNEXPECTED_ATTRIBUTE_VALUE, "xmlns");
}
if ($element->hasAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns:link") && $element->getAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns:xlink") !== Parser::XLINK_NAMESPACE) {
$this->error(ParseError::UNEXPECTED_ATTRIBUTE_VALUE, "xmlns:xlink");
}
# Return element.
return $element;

View file

@ -76,9 +76,6 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
// run the tree builder
try {
$treeBuilder->constructTree();
} catch (\DOMException $e) {
$this->markTestIncomplete('Requires implementation of the "Coercing an HTML DOM into an infoset" specification section');
return;
} catch (LoopException $e) {
$act = $this->balanceTree($this->serializeTree($doc, (bool) $fragmentContext), $exp);
$this->assertEquals($exp, $act, $e->getMessage()."\n".$treeBuilder->debugLog);

View file

@ -0,0 +1,53 @@
#data
<!DOCTYPE html><svg xmlns="http://www.w3.org/2000/svg"/>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg xmlns="http://www.w3.org/1999/xlink"/>
#errors
(1,58): invalid-namespace-attribute-value
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg xmlns:xlink="http://www.w3.org/1999/xlink"/>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg xlink:href="http://example.com/"/>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| xlink href="http://example.com/"
#data
<!DOCTYPE html><svg xmlns:xlink="http://www.w3.org/1999/xhtml" xlink:href="http://example.com/"/>
#errors
(1,97): invalid-namespace-attribute-value
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| xlink href="http://example.com/"