diff --git a/lib/ParseError.php b/lib/ParseError.php index 0e6d24d..0abec4a 100644 --- a/lib/ParseError.php +++ b/lib/ParseError.php @@ -68,7 +68,7 @@ class ParseError { const UNEXPECTED_CHAR = 211; const UNEXPECTED_EOF = 212; const UNEXPECTED_PARENT = 213; - const UNEXPECTED_ATTRIBUTE_VALUE = 214; + const INVALID_NAMESPACE_ATTRIBUTE_VALUE = 214; const FOSTERED_START_TAG = 215; const FOSTERED_END_TAG = 216; const FOSTERED_CHAR = 217; @@ -87,7 +87,7 @@ class ParseError { self::UNEXPECTED_CHAR => 'Unexpected character data', self::UNEXPECTED_EOF => 'Unexpected end of file', self::UNEXPECTED_PARENT => 'Start tag <%s> not valid in parent <%s>', - self::UNEXPECTED_ATTRIBUTE_VALUE => 'Unexpected value in attribute "%s"', + self::INVALID_NAMESPACE_ATTRIBUTE_VALUE => 'Invalid value for attribute "%s"; it must have value "%s" or be omitted', self::FOSTERED_START_TAG => 'Start tag <%s> moved to before table', self::FOSTERED_END_TAG => 'End tag %s> moved to before table', self::FOSTERED_CHAR => 'Character moved to before table', diff --git a/lib/Token.php b/lib/Token.php index 26e6ce1..6c5f656 100644 --- a/lib/Token.php +++ b/lib/Token.php @@ -70,41 +70,34 @@ abstract class TagToken extends Token { $this->name = $name; } - public function getAttribute(string $name) { - $key = $this->_getAttributeKey($name); + public function hasAttribute(string $name): bool { + return (!is_null($this->_getAttributeKey($name))); + } - return (isset($this->attributes[$key])) ? $this->attributes[$key] : null; - } + public function getAttribute(string $name) { + $key = $this->_getAttributeKey($name); + return (isset($this->attributes[$key])) ? $this->attributes[$key] : null; + } - public function hasAttribute(string $name): bool { - return (!is_null($this->_getAttributeKey($name))); - } + public function setAttribute(string $name, string $value) { + $key = $this->_getAttributeKey($name); + if (is_null($key)) { + $this->attributes[] = new TokenAttr($name, $value); + } else { + $attribute = &$this->attributes[$key]; + $attribute->name = $name; + $attribute->value = $value; + } + } - public function removeAttribute(string $name) { - unset($this->attributes[$this->_getAttributeKey($name)]); - } - - public function setAttribute(string $name, string $value) { - $key = $this->_getAttributeKey($name); - - if (is_null($key)) { - $this->attributes[] = new TokenAttr($name, $value); - } else { - $attribute = &$this->attributes[$key]; - $attribute->name = $name; - $attribute->value = $value; - } - } - - private function _getAttributeKey(string $name) { - foreach ($this->attributes as $key => $a) { - if ($a->name === $name) { - return $key; - } - } - - return null; - } + private function _getAttributeKey(string $name) { + foreach ($this->attributes as $key => $a) { + if ($a->name === $name) { + return $key; + } + } + return null; + } } class StartTagToken extends TagToken { @@ -120,8 +113,12 @@ class EOFToken extends Token { } class TokenAttr { + /** @var string The name of the attribute */ public $name; + /** @var string The attribute's value */ public $value; + /** @var string|null The attribute's namespace. This is normally null but may be set during tree construction */ + public $namespace = null; public function __construct(string $name, string $value) { $this->name = $name; diff --git a/lib/TreeBuilder.php b/lib/TreeBuilder.php index d5b4269..80a203e 100644 --- a/lib/TreeBuilder.php +++ b/lib/TreeBuilder.php @@ -192,11 +192,11 @@ class TreeBuilder { 'xlink:show' => Parser::XLINK_NAMESPACE, 'xlink:title' => Parser::XLINK_NAMESPACE, 'xlink:type' => Parser::XLINK_NAMESPACE, - 'xml:base' => Parser::XML_NAMESPACE, + 'xml:id' => Parser::XML_NAMESPACE, // DEVIATION 'xml:lang' => Parser::XML_NAMESPACE, 'xml:space' => Parser::XML_NAMESPACE, 'xmlns' => Parser::XMLNS_NAMESPACE, - 'xmlns:xlink' => Parser::XLINK_NAMESPACE, + 'xmlns:xlink' => Parser::XMLNS_NAMESPACE, ]; # The following elements have varying levels of special parsing rules: HTML’s # address, applet, area, article, aside, base, basefont, bgsound, blockquote, @@ -4277,29 +4277,21 @@ class TreeBuilder { $element = $document->createElementNS($namespace, $localName); # Append each attribute in the given token to element. foreach ($token->attributes as $attr) { - $ns = null; - if ($namespace) { - // Determine the namespace URI for the prefix, if any - if (strpos($attr->name, "xml:") === 0) { - $ns = Parser::XML_NAMESPACE; - } elseif (strpos($attr->name, "xmlns:") === 0) { - $ns = Parser::XMLNS_NAMESPACE; - } elseif (strpos($attr->name, "xlink:") === 0) { - $ns = Parser::XLINK_NAMESPACE; - } + # If element has an xmlns attribute in the XMLNS namespace whose value + # is not exactly the same as the element's namespace, that is a + # parse error. Similarly, if element has an xmlns:xlink attribute in + # the XMLNS namespace whose value is not the XLink Namespace, that + # is a parse error. + // NOTE: The specification is silent as to how to handle these + // attributes. We assume these bad attributes should be dropped, + // since they break the DOM when added + if ($attr->name === "xmlns" && $namespace !== null && $attr->value !== $namespace) { + $this->error(ParseError::INVALID_NAMESPACE_ATTRIBUTE_VALUE, "xmlns", $namespace); + } elseif ($attr->name === "xmlns:xlink" && $namespace !== null && $attr->value !== Parser::XLINK_NAMESPACE) { + $this->error(ParseError::INVALID_NAMESPACE_ATTRIBUTE_VALUE, "xmlns:xlink", Parser::XLINK_NAMESPACE); + } else { + $element->setAttributeNS($attr->namespace, $attr->name, $attr->value); } - $element->setAttributeNS($ns, $attr->name, $attr->value); - } - # If element has an xmlns attribute in the XMLNS namespace whose value - # is not exactly the same as the element's namespace, that is a - # parse error. Similarly, if element has an xmlns:xlink attribute in - # the XMLNS namespace whose value is not the XLink Namespace, that - # is a parse error. - if ($element->hasAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns") && $element->getAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns") !== $element->namespaceURI) { - $this->error(ParseError::UNEXPECTED_ATTRIBUTE_VALUE, "xmlns"); - } - if ($element->hasAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns:link") && $element->getAttributeNS(Parser::XMLNS_NAMESPACE, "xmlns:xlink") !== Parser::XLINK_NAMESPACE) { - $this->error(ParseError::UNEXPECTED_ATTRIBUTE_VALUE, "xmlns:xlink"); } # Return element. return $element; diff --git a/tests/cases/TestTreeConstructor.php b/tests/cases/TestTreeConstructor.php index eeb974b..873e426 100644 --- a/tests/cases/TestTreeConstructor.php +++ b/tests/cases/TestTreeConstructor.php @@ -76,9 +76,6 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase { // run the tree builder try { $treeBuilder->constructTree(); - } catch (\DOMException $e) { - $this->markTestIncomplete('Requires implementation of the "Coercing an HTML DOM into an infoset" specification section'); - return; } catch (LoopException $e) { $act = $this->balanceTree($this->serializeTree($doc, (bool) $fragmentContext), $exp); $this->assertEquals($exp, $act, $e->getMessage()."\n".$treeBuilder->debugLog); diff --git a/tests/cases/tree-construction/mensbeam01.dat b/tests/cases/tree-construction/mensbeam01.dat new file mode 100644 index 0000000..5e1de3c --- /dev/null +++ b/tests/cases/tree-construction/mensbeam01.dat @@ -0,0 +1,53 @@ +#data + +#errors +#document +| +| +|
+| +|