Browse Source

Parse to an unmodified DOM

ns
J. King 3 years ago
parent
commit
88af223a89
  1. 13
      lib/ActiveFormattingElementsList.php
  2. 2
      lib/Charset.php
  3. 2
      lib/Exception.php
  4. 57
      lib/NameCoercion.php
  5. 23
      lib/OpenElementsStack.php
  6. 28
      lib/Parser.php
  7. 109
      lib/TreeBuilder.php
  8. 14
      tests/cases/TestTreeConstructor.php

13
lib/ActiveFormattingElementsList.php

@ -30,7 +30,7 @@ class ActiveFormattingElementsList extends Stack {
&& isset($value['token'])
&& isset($value['element'])
&& $value['token'] instanceof StartTagToken
&& $value['element'] instanceof Element
&& $value['element'] instanceof \DOMElement
), new Exception(Exception::STACK_INVALID_VALUE));
if ($value instanceof ActiveFormattingElementsMarker) {
$this->_storage[$offset ?? $count] = $value;
@ -69,7 +69,7 @@ class ActiveFormattingElementsList extends Stack {
$this->count = count($this->_storage);
}
protected function matchElement(Element $a, Element $b): bool {
protected function matchElement(\DOMElement $a, \DOMElement $b): bool {
// Compare elements as part of pushing an element onto the stack
# 1. If there are already three elements in the list of active formatting
# elements after the last marker, if any, or anywhere in the list if there are
@ -96,7 +96,7 @@ class ActiveFormattingElementsList extends Stack {
return true;
}
public function insert(StartTagToken $token, Element $element, ?int $at = null): void {
public function insert(StartTagToken $token, \DOMElement $element, ?int $at = null): void {
assert($at === null || ($at >= 0 && $at <= $this->count), new Exception(Exception::STACK_INVALID_INDEX, $at));
if ($at === null) {
$this[] = [
@ -134,7 +134,7 @@ class ActiveFormattingElementsList extends Stack {
$this->count = count($this->_storage);
}
public function findSame(Element $target): int {
public function findSame(\DOMElement $target): int {
foreach ($this as $k => $entry) {
if (!$entry instanceof ActiveFormattingElementsMarker && $entry['element']->isSameNode($target)) {
return $k;
@ -155,7 +155,7 @@ class ActiveFormattingElementsList extends Stack {
return -1;
}
public function removeSame(Element $target): void {
public function removeSame(\DOMElement $target): void {
$pos = $this->findSame($target);
if ($pos > -1) {
unset($this[$pos]);
@ -180,4 +180,5 @@ class ActiveFormattingElementsList extends Stack {
}
}
class ActiveFormattingElementsMarker {}
class ActiveFormattingElementsMarker {
}

2
lib/Charset.php

@ -10,7 +10,7 @@ use MensBeam\Intl\Encoding;
use MensBeam\Mime\MimeType;
abstract class Charset {
/** Finds a Unicode byte order mark by a byte stream
/** Finds a Unicode byte order mark in a byte stream
* and returns the detected encoding, if any */
public static function fromBOM(string $data): ?string {
if (substr($data, 0, 3) === "\u{FEFF}") {

2
lib/Exception.php

@ -13,6 +13,7 @@ class Exception extends \Exception {
const UNREACHABLE_CODE = 103;
const PARSER_NONEMPTY_DOCUMENT = 201;
const INVALID_QUIRKS_MODE = 202;
const STACK_INVALID_INDEX = 301;
const STACK_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 302;
@ -45,6 +46,7 @@ class Exception extends \Exception {
103 => 'Unreachable code',
201 => 'Non-empty Document supplied as argument for Parser',
202 => 'Fragment\'s quirks mode must be one of Parser::NO_QUIRKS_MODE, Parser::LIMITED_QUIRKS_MODE, or Parser::QUIRKS_MODE',
301 => 'Invalid Stack index at %s',
302 => 'Element, Document, or DOMDocumentFragment expected for fragment context',

57
lib/NameCoercion.php

@ -0,0 +1,57 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
use MensBeam\Intl\Encoding\UTF8;
trait NameCoercion {
protected function coerceName(string $name): string {
// This matches the inverse of the production of NameChar in XML 1.0,
// with the added exclusion of ":" from allowed characters
// See https://www.w3.org/TR/REC-xml/#NT-NameStartChar
preg_match_all('/[^\-\.0-9\x{B7}\x{300}-\x{36F}\x{203F}-\x{2040}A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m);
foreach (array_unique($m[0], \SORT_STRING) as $c) {
$o = (new UTF8($c))->nextCode();
$esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT);
$name = str_replace($c, $esc, $name);
}
// Apply stricter rules to the first character
if (preg_match('/^[^A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m)) {
$c = $m[0];
$o = (new UTF8($c))->nextCode();
$esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT);
$name = $esc.substr($name, strlen($c));
}
return $name;
}
protected function uncoerceName(string $name): string {
preg_match_all('/U[0-9A-F]{6}/', $name, $m);
foreach (array_unique($m[0], \SORT_STRING) as $o) {
$c = UTF8::encode(hexdec(substr($o, 1)));
$name = str_replace($o, $c, $name);
}
return $name;
}
protected function escapeString(string $string, bool $attribute = false): string {
# Escaping a string (for the purposes of the algorithm above) consists of
# running the following steps:
# 1. Replace any occurrence of the "&" character by the string "&amp;".
# 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
# string "&nbsp;".
$string = str_replace(['&', "\u{A0}"], ['&amp;', '&nbsp;'], $string);
# 3. If the algorithm was invoked in the attribute mode, replace any
# occurrences of the """ character by the string "&quot;".
# 4. If the algorithm was not invoked in the attribute mode, replace any
# occurrences of the "<" character by the string "&lt;", and any
# occurrences of the ">" character by the string "&gt;".
return ($attribute) ? str_replace('"', '&quot;', $string) : str_replace(['<', '>'], ['&lt;', '&gt;'], $string);
}
}

23
lib/OpenElementsStack.php

@ -89,27 +89,22 @@ class OpenElementsStack extends Stack {
],
];
/** @var ?\MensBeam\HTML\Element */
/** @var ?\DOMElement */
protected $fragmentContext = null;
/** @var ?\MensBeam\HTML\Element */
/** @var ?\DOMElement */
public $currentNode = null;
/** @var ?string */
public $currentNodeName = null;
/** @var ?string */
public $currentNodeNamespace = null;
/** @var ?\MensBeam\HTML\Element */
/** @var ?\DOMElement */
public $adjustedCurrentNode = null;
/** @var ?string */
public $adjustedCurrentNodeName = null;
/** @var ?string */
public $adjustedCurrentNodeNamespace = null;
public function __construct(?Element $fragmentContext = null) {
// If the fragment context is not null and is not a document fragment, document,
// or element then we have a problem. Additionally, if the parser is created for
// parsing a fragment and the fragment context is null then we have a problem,
// too.
assert($fragmentContext === null || $fragmentContext instanceof \DOMDocumentFragment || $fragmentContext instanceof \DOMDocument || $fragmentContext instanceof \DOMElement,new Exception(Exception::STACK_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED));
public function __construct(?\DOMElement $fragmentContext = null) {
$this->fragmentContext = $fragmentContext;
}
@ -136,7 +131,7 @@ class OpenElementsStack extends Stack {
$this->computeProperties();
}
public function insert(Element $element, ?int $at = null): void {
public function insert(\DOMElement $element, ?int $at = null): void {
assert($at === null || ($at >= 0 && $at <= count($this->_storage)), new Exception(Exception::STACK_INVALID_INDEX, $at));
if ($at === null) {
$this[] = $element; // @codeCoverageIgnore
@ -154,7 +149,7 @@ class OpenElementsStack extends Stack {
$this->computeProperties();
}
public function popUntilSame(Element $target): void {
public function popUntilSame(\DOMElement $target): void {
do {
$node = array_pop($this->_storage);
} while (!$node->isSameNode($target));
@ -179,7 +174,7 @@ class OpenElementsStack extends Stack {
return -1;
}
public function findSame(Element $target): int {
public function findSame(\DOMElement $target): int {
for ($k = (sizeof($this->_storage) - 1); $k > -1; $k--) {
if ($this->_storage[$k]->isSameNode($target)) {
return $k;
@ -188,7 +183,7 @@ class OpenElementsStack extends Stack {
return -1;
}
public function removeSame(Element $target): void {
public function removeSame(\DOMElement $target): void {
$pos = $this->findSame($target);
if ($pos > -1) {
unset($this[$pos]);
@ -313,7 +308,7 @@ class OpenElementsStack extends Stack {
foreach ($this as $node) {
# If node is the target node, terminate in a match state.
foreach ($targets as $target) {
if ($target instanceof Element) {
if ($target instanceof \DOMElement) {
if ($node->isSameNode($target)) {
return true;
}

28
lib/Parser.php

@ -9,15 +9,19 @@ namespace MensBeam\HTML;
class Parser {
public static $fallbackEncoding = "windows-1252";
public const NO_QUIRKS_MODE = 0;
public const QUIRKS_MODE = 1;
public const LIMITED_QUIRKS_MODE = 2;
// Namespace constants
const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
const XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink';
const XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace';
const XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/';
public const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
public const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
public const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
public const XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink';
public const XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace';
public const XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/';
const NAMESPACE_MAP = [
public const NAMESPACE_MAP = [
self::HTML_NAMESPACE => "",
self::MATHML_NAMESPACE => "math",
self::SVG_NAMESPACE => "svg",
@ -26,9 +30,9 @@ class Parser {
self::XMLNS_NAMESPACE => "xmlns",
];
public static function parse(string $data, ?Document $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): Document {
public static function parse(string $data, ?\DOMDocument $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): Document {
// Initialize the various classes needed for parsing
$document = $document ?? new Document;
$document = $document ?? new \DOMDocument;
if ((error_reporting() & \E_USER_WARNING)) {
$errorHandler = new ParseError;
} else {
@ -52,10 +56,10 @@ class Parser {
return $document;
}
public static function parseFragment(string $data, ?Document $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): DocumentFragment {
public static function parseFragment(string $data, ?\DOMDocument $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): DocumentFragment {
// Create the requisite parsing context if none was supplied
$document = $document ?? new Document;
$tempDocument = new Document;
$document = $document ?? new \DOMDocument;
$tempDocument = new \DOMDocument;
$fragmentContext = $fragmentContext ?? $document->createElement("div");
// parse the fragment into the temporary document
self::parse($data, $tempDocument, $encodingOrContentType, $fragmentContext, $file);

109
lib/TreeBuilder.php

@ -7,13 +7,13 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class TreeBuilder {
use ParseErrorEmitter, EscapeString;
use ParseErrorEmitter, NameCoercion;
public $debugLog = "";
/** @var \MensBeam\HTML\ActiveFormattingElementsList The list of active formatting elements, used when elements are improperly nested */
protected $activeFormattingElementsList;
/** @var \MensBeam\HTML\Document The DOMDocument that is assembled by this class */
/** @var \DOMDocument The DOMDocument that is assembled by this class */
protected $DOM;
/** @var ?\DOMElement The form element pointer points to the last form element that was opened and whose end tag has not yet been seen. It is used to make form controls associate with forms in the face of dramatically bad markup, for historical reasons. It is ignored inside template elements */
protected $formElement;
@ -39,6 +39,12 @@ class TreeBuilder {
protected $templateInsertionModes;
/** @var array An array holding character tokens which may need to be foster-parented during table parsing */
protected $pendingTableCharacterTokens = [];
/** @var bool Flag used to track whether name mangling has been performed for elements; this is a minor optimization */
protected $mangledElements = false;
/** @var bool Flag used to track whether name mangling has been performed for attributes; this is a minor optimization */
protected $mangledAttributes = false;
/** @var int The quirks-mode setting of the document being parsed */
protected $quirksMode = Parser::NO_QUIRKS_MODE;
// Constants used for insertion modes
protected const INITIAL_MODE = 0;
@ -244,8 +250,12 @@ class TreeBuilder {
"frameset" => self::IN_FRAMESET_MODE,
];
public function __construct(Document $dom, Data $data, Tokenizer $tokenizer, \Generator $tokenList, ParseError $errorHandler, OpenElementsStack $stack, TemplateInsertionModesStack $templateInsertionModes, ?\DOMElement $fragmentContext = null) {
assert(!$dom->hasChildNodes() && !$dom->doctype, new Exception(Exception::TREEBUILDER_NON_EMPTY_TARGET_DOCUMENT));
public function __construct(\DOMDocument $dom, Data $data, Tokenizer $tokenizer, \Generator $tokenList, ParseError $errorHandler, OpenElementsStack $stack, TemplateInsertionModesStack $templateInsertionModes, ?\DOMElement $fragmentContext = null, ?bool $fragmentQuirks = null) {
if ($dom->hasChildNodes() || $dom->doctype) {
throw new Exception(Exception::TREEBUILDER_NON_EMPTY_TARGET_DOCUMENT);
} elseif (!in_array($fragmentQuirks ?? Parser::NO_QUIRKS_MODE, [Parser::NO_QUIRKS_MODE, Parser::LIMITED_QUIRKS_MODE, Parser::QUIRKS_MODE])) {
throw new Exception(Exception::INVALID_QUIRKS_MODE);
}
$this->DOM = $dom;
$this->fragmentContext = $fragmentContext;
$this->stack = $stack;
@ -264,7 +274,7 @@ class TreeBuilder {
# let the Document be in quirks mode. Otherwise, the node document of
# the context element is in limited-quirks mode, then let the Document
# be in limited-quirks mode. Otherwise, leave the Document in no-quirks mode.
$dom->quirksMode = $fragmentContext->ownerDocument->quirksMode;
$this->quirksMode = $fragmentQuirks ?? $this->quirksMode;
# Create a new HTML parser, and associate it with the just created Document node.
// Already done.
# Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
@ -315,7 +325,7 @@ class TreeBuilder {
// If element name coercison has occurred at some earlier point,
// we must coerce all end tag names to match mangled start tags
if ($token instanceof EndTagToken && $this->DOM->mangledElements) {
if ($token instanceof EndTagToken && $this->mangledElements) {
$token->name = $this->coerceName($token->name);
}
@ -381,8 +391,9 @@ class TreeBuilder {
// If attribute name coercison has occurred at some earlier point,
// we must coerce all attributes on html and body start tags in
// case they are relocated to existing elements
if (!$top->hasAttributeNS(null, $this->DOM->mangledAttributes ? $this->coerceName($a->name) : $a->name)) {
$top->setAttributeNS(null, $a->name, $a->value);
$attrName = $this->mangledAttributes ? $this->coerceName($a->name) : $a->name;
if (!$top->hasAttributeNS(null, $attrName)) {
$this->elementSetAttribute($top, null, $attrName, $a->value);
}
}
}
@ -412,8 +423,9 @@ class TreeBuilder {
// If attribute name coercison has occurred at some earlier point,
// we must coerce all attributes on html and body start tags in
// case they are relocated to existing elements
if (!$body->hasAttributeNS(null, $this->DOM->mangledAttributes ? $this->coerceName($a->name) : $a->name)) {
$body->setAttributeNS(null, $a->name, $a->value);
$attrName = $this->mangledAttributes ? $this->coerceName($a->name) : $a->name;
if (!$body->hasAttributeNS(null, $attrName)) {
$this->elementSetAttribute($body, null, $attrName, $a->value);
}
}
}
@ -706,7 +718,7 @@ class TreeBuilder {
# A start tag whose tag name is "table"
elseif ($token->name === "table") {
# If the Document is not set to quirks mode, and the stack of open elements has a p element in button scope, then close a p element.
if ($this->DOM->quirksMode !== Document::QUIRKS_MODE && $this->stack->hasElementInButtonScope("p")) {
if ($this->quirksMode !== Parser::QUIRKS_MODE && $this->stack->hasElementInButtonScope("p")) {
$this->closePElement($token);
}
# Insert an HTML element for the token.
@ -1386,7 +1398,7 @@ class TreeBuilder {
|| ($token->system === null && strpos($public, '-//w3c//dtd html 4.01 frameset//') === 0)
|| ($token->system === null && strpos($public, '-//w3c//dtd html 4.01 transitional//') === 0)
) {
$this->DOM->quirksMode = Document::QUIRKS_MODE;
$this->quirksMode = Parser::QUIRKS_MODE;
}
# Otherwise, if the document is not an iframe srcdoc document, and the DOCTYPE
# token matches one of the conditions in the following list, then set the
@ -1399,7 +1411,7 @@ class TreeBuilder {
|| ($token->system !== null && strpos($public, '-//w3c//dtd html 4.01 frameset//') === 0)
|| ($token->system !== null && strpos($public, '-//w3c//dtd html 4.01 transitional//') === 0)
) {
$this->DOM->quirksMode = Document::LIMITED_QUIRKS_MODE;
$this->quirksMode = Parser::LIMITED_QUIRKS_MODE;
}
# The system identifier and public identifier strings must be compared to the
# values given in the lists above in an ASCII case-insensitive manner. A system
@ -1426,7 +1438,7 @@ class TreeBuilder {
$this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_EOF);
}
$this->DOM->quirksMode = Document::QUIRKS_MODE;
$this->quirksMode = Parser::QUIRKS_MODE;
# In any case, switch the insertion mode to "before html", then reprocess the
# token.
@ -3832,7 +3844,8 @@ class TreeBuilder {
# template’s template contents, after its last child (if any), and abort these
# substeps.
if ($lastTemplate && (!$lastTable || ($lastTemplateIndex > $lastTableIndex))) {
$insertionLocation = $lastTemplate->content;
// DEVIATION: We don't implement template contents in the parser itself
$insertionLocation = $lastTemplate;
// Abort!
}
# 4. If there is no last table, then let adjusted insertion location be inside
@ -3868,7 +3881,8 @@ class TreeBuilder {
# instead be inside the template element’s template contents, after its last
# child (if any).
if ($insertionLocation instanceof Element && $insertionLocation->nodeName === 'template' && $insertionLocation->namespaceURI === null) {
$insertionLocation = $insertionLocation->content;
// DEVIATION: We don't implement template contents in the parser itself
$insertionLocation = $insertionLocation;
}
# 4. Return the adjusted insertion location.
return [
@ -3936,7 +3950,7 @@ class TreeBuilder {
$position->appendChild($this->DOM->createComment($token->data));
}
public function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null): Element {
public function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null): \DOMElement {
# When the steps below require the user agent to insert a foreign
# element for a token in a given namespace, the user agent must
# run these steps:
@ -4135,19 +4149,32 @@ class TreeBuilder {
return $this->insertionMode = self::IN_ROW_MODE;
}
protected function isElementSpecial(Element $element): bool {
protected function isElementSpecial(\DOMElement $element): bool {
$name = $element->nodeName;
$ns = $element->namespaceURI ?? Parser::HTML_NAMESPACE;
return in_array($name, self::SPECIAL_ELEMENTS[$ns] ?? []);
}
protected function createElementForToken(TagToken $token, ?string $namespace = null, ?\DOMNode $intendedParent = null): Element {
protected function createElementForToken(TagToken $token, ?string $namespace = null, ?\DOMNode $intendedParent = null): \DOMElement {
// DEVIATION: Steps related to scripting have been elided entirely
# Let document be intended parent's node document.
# Let local name be the tag name of the token.
# Let element be the result of creating an element given document,
# localName, given namespace, null, and is.
$element = $this->DOM->createElementNS($namespace, $token->name);
try {
$element = $this->DOM->createElementNS($namespace, $token->name);
} catch (\DOMException $e) {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
if ($namespace !== null) {
$qualifiedName = implode(":", array_map([$this, "coerceName"], explode(":", $token->name, 2)));
} else {
$qualifiedName = $this->coerceName($token->name);
}
$element = $this->DOM->createElementNS($namespace, $qualifiedName);
$this->mangledElements = true;
}
# Append each attribute in the given token to element.
foreach ($token->attributes as $attr) {
# If element has an xmlns attribute in the XMLNS namespace whose value
@ -4163,18 +4190,54 @@ class TreeBuilder {
} elseif ($attr->name === "xmlns:xlink" && $namespace !== null && $attr->value !== Parser::XLINK_NAMESPACE) {
$this->error(ParseError::INVALID_NAMESPACE_ATTRIBUTE_VALUE, "xmlns:xlink", Parser::XLINK_NAMESPACE);
} else {
$element->setAttributeNS($attr->namespace, $attr->name, $attr->value);
$this->elementSetAttribute($element, $attr->namespace, $attr->name, $attr->value);
}
}
# Return element.
return $element;
}
public function isMathMLTextIntegrationPoint(Element $e): bool {
public function elementSetAttribute(\DOMElement $element, ?string $namespaceURI, string $qualifiedName, string $value): void {
if ($namespaceURI === Parser::XMLNS_NAMESPACE) {
// NOTE: We create attribute nodes so that xmlns attributes
// don't get lost; otherwise they cannot be serialized
$a = @$element->ownerDocument->createAttributeNS($namespaceURI, $qualifiedName);
if ($a === false) {
// The document element does not exist yet, so we need
// to insert this element into the document
$element->ownerDocument->appendChild($element);
$a = $element->ownerDocument->createAttributeNS($namespaceURI, $qualifiedName);
$element->ownerDocument->removeChild($element);
}
$a->value = $this->escapeString($value, true);
$element->setAttributeNodeNS($a);
} else {
try {
$element->setAttributeNS($namespaceURI, $qualifiedName, $value);
} catch (\DOMException $e) {
// The attribute name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
$element->ownerDocument->mangledAttributes = true;
if ($namespaceURI !== null) {
$qualifiedName = implode(":", array_map([$element, "coerceName"], explode(":", $qualifiedName, 2)));
} else {
$qualifiedName = $this->coerceName($qualifiedName);
}
$element->setAttributeNS($namespaceURI, $qualifiedName, $value);
$this->mangledAttributes = true;
}
if ($qualifiedName === "id" && $namespaceURI === null) {
$element->setIdAttribute($qualifiedName, true);
}
}
}
public function isMathMLTextIntegrationPoint(\DOMElement $e): bool {
return ($e->namespaceURI === Parser::MATHML_NAMESPACE && (in_array($e->nodeName, ['mi', 'mo', 'mn', 'ms', 'mtext'])));
}
public function isHTMLIntegrationPoint(Element $e): bool {
public function isHTMLIntegrationPoint(\DOMElement $e): bool {
$encoding = strtolower((string)$e->getAttribute('encoding'));
return ((
$e->namespaceURI === Parser::MATHML_NAMESPACE &&

14
tests/cases/TestTreeConstructor.php

@ -7,7 +7,6 @@ declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
use MensBeam\HTML\Data;
use MensBeam\HTML\Document;
use MensBeam\HTML\LoopException;
use MensBeam\HTML\NotImplementedException;
use MensBeam\HTML\OpenElementsStack;
@ -57,7 +56,7 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
return true;
});
// initialize the output document
$doc = new Document;
$doc = new \DOMDocument;
// prepare the fragment context, if any
if ($fragment) {
$fragment = explode(" ", $fragment);
@ -298,7 +297,7 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
$this->out[] = "| ".str_repeat(" ", $this->depth).$data;
}
protected function serializeTree(Document $d, bool $fragment): array {
protected function serializeTree(\DOMDocument $d, bool $fragment): array {
$this->out = [];
$this->depth = 0;
if ($fragment){
@ -351,13 +350,14 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
if ($e->localName === "template" && $e->namespaceURI === null) {
$this->push("content");
$this->depth++;
foreach ($e->content->childNodes as $n) {
foreach ($e->childNodes as $n) {
$this->serializeNode($n);
}
$this->depth--;
}
foreach ($e->childNodes as $n) {
$this->serializeNode($n);
} else {
foreach ($e->childNodes as $n) {
$this->serializeNode($n);
}
}
$this->depth--;
}

Loading…
Cancel
Save