Browse Source

Hopefully complete serializer implementation

serialize
J. King 3 years ago
parent
commit
c82127c61f
  1. 132
      lib/Parser/Serializer.php

132
lib/Parser/Serializer.php

@ -14,7 +14,7 @@ abstract class Serializer {
protected const VOID_ELEMENTS = ["basefont", "bgsound", "frame", "keygen", "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr"]; protected const VOID_ELEMENTS = ["basefont", "bgsound", "frame", "keygen", "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr"];
protected const RAWTEXT_ELEMENTS = ["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext"]; protected const RAWTEXT_ELEMENTS = ["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext"];
public function seerializeOuter(\DOMNode $node): string { public static function serializeOuter(\DOMNode $node): string {
$s = ""; $s = "";
$stack = []; $stack = [];
$n = $node; $n = $node;
@ -45,7 +45,47 @@ abstract class Serializer {
# value, escaped as described below in attribute mode, and # value, escaped as described below in attribute mode, and
# a second U+0022 QUOTATION MARK character ("). # a second U+0022 QUOTATION MARK character (").
foreach ($n->attributes as $a) { foreach ($n->attributes as $a) {
$s .= " ".self::serializeAttribute($a); # An attribute's serialized name for the purposes of the previous
# paragraph must be determined as follows:
# If the attribute has no namespace
if ($a->namespaceURI === null) {
# The attribute's serialized name is the attribute's local name.
$name = self::uncoerceName($a->localName);
}
# If the attribute is in the XML namespace
elseif ($a->namespaceURI === Parser::XML_NAMESPACE) {
# The attribute's serialized name is the string "xml:" followed
# by the attribute's local name.
$name = "xml:".self::uncoerceName($a->localName);
}
# If the attribute is in the XMLNS namespace...
elseif ($a->namespaceURI === Parser::XMLNS_NAMESPACE) {
# ... and the attribute's local name is xmlns
if ($a->localName === "xmlns") {
# The attribute's serialized name is the string "xmlns".
$a = "xmlns";
}
# ... and the attribute's local name is not xmlns
else {
# The attribute's serialized name is the string "xmlns:"
# followed by the attribute's local name.
$name = "xmlns:".self::uncoerceName($a->localName);
}
}
# If the attribute is in the XLink namespace
elseif ($a->namespaceURI === Parser::XLINK_NAMESPACE) {
# The attribute's serialized name is the string "xlink:"
# followed by the attribute's local name.
$name = "xlink:".self::uncoerceName($a->localName);
}
# If the attribute is in some other namespace
else {
# The attribute's serialized name is the attribute's qualified name.
$name = $a->name;
}
$value = self::escapeString($a->value);
$s .= " $name=\"$value\"";
} }
# Append a U+003E GREATER-THAN SIGN character (>). # Append a U+003E GREATER-THAN SIGN character (>).
$s .= ">"; $s .= ">";
@ -58,7 +98,19 @@ abstract class Serializer {
# character (/), tagname again, and finally a # character (/), tagname again, and finally a
# U+003E GREATER-THAN SIGN character (>). # U+003E GREATER-THAN SIGN character (>).
if (($n->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE && !in_array($tagName, self::VOID_ELEMENTS)) { if (($n->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE && !in_array($tagName, self::VOID_ELEMENTS)) {
if ($n->hasChildNodes()) { # If the node is a template element, then let the node instead
# be the template element's template contents
# (a DocumentFragment node).
if (
$n instanceof \DOMElement
&& ($n->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE
&& $n->tagName === "template"
&& property_exists($n, "content")
&& $n->content instanceof \DOMDocumentFragment
) {
// NOTE: Treat template contents as any other document fragment and just invoke the inner serializer
$s .= self::serializeInner($n->content)."</$tagName>";
} elseif ($n->hasChildNodes()) {
$stack[] = $tagName; $stack[] = $tagName;
$n = $n->firstChild; $n = $n->firstChild;
continue; continue;
@ -74,7 +126,8 @@ abstract class Serializer {
# if the parent of current node is a noscript element # if the parent of current node is a noscript element
# and scripting is enabled for the node, then append # and scripting is enabled for the node, then append
# the value of current node's data IDL attribute literally. # the value of current node's data IDL attribute literally.
if (($n->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE && in_array($n->parentNode->tagName, self::RAWTEXT_ELEMENTS)) { $p = $n->parentNode;
if ($p instanceof \DOMElement && ($p->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE && in_array($p->tagName, self::RAWTEXT_ELEMENTS)) {
// NOTE: scripting is assumed not to be enabled // NOTE: scripting is assumed not to be enabled
$s .= $n->data; $s .= $n->data;
} }
@ -135,56 +188,31 @@ abstract class Serializer {
return $s; return $s;
} }
protected static function serializeAttribute(\DOMAttr $a): string { public static function serializeInner(\DOMNode $node): string {
# For each attribute that the element has, append a # Let s be a string, and initialize it to the empty string.
# U+0020 SPACE character, the attribute's serialized name as $s = "";
# described below, a U+003D EQUALS SIGN character (=), a # If the node serializes as void, then return the empty string.
# U+0022 QUOTATION MARK character ("), the attribute's # If the node is a template element, then let the node instead
# value, escaped as described below in attribute mode, and # be the template element's template contents
# a second U+0022 QUOTATION MARK character ("). # (a DocumentFragment node).
// NOTE: We won't add the space here; it's only appropriate if ($node instanceof \DOMElement && ($node->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) {
// if serializing an element. if (!in_array($node->tagName, self::VOID_ELEMENTS)) {
return "";
# An attribute's serialized name for the purposes of the previous } elseif ($node->tagName === "template" && property_exists($node, "content") && $node->content instanceof \DOMDocumentFragment) {
# paragraph must be determined as follows: // NOTE: template elements won't necessarily have a content
// property because PHP's DOM does not support this natively
# If the attribute has no namespace $node = $node->content;
if ($a->namespaceURI === null) {
# The attribute's serialized name is the attribute's local name.
$name = self::uncoerceName($a->localName);
}
# If the attribute is in the XML namespace
elseif ($a->namespaceURI === Parser::XML_NAMESPACE) {
# The attribute's serialized name is the string "xml:" followed
# by the attribute's local name.
$name = "xml:".self::uncoerceName($a->localName);
}
# If the attribute is in the XMLNS namespace...
elseif ($a->namespaceURI === Parser::XMLNS_NAMESPACE) {
# ... and the attribute's local name is xmlns
if ($a->localName === "xmlns") {
# The attribute's serialized name is the string "xmlns".
$a = "xmlns";
}
# ... and the attribute's local name is not xmlns
else {
# The attribute's serialized name is the string "xmlns:"
# followed by the attribute's local name.
$name = "xmlns:".self::uncoerceName($a->localName);
} }
} }
# If the attribute is in the XLink namespace if ($node instanceof \DOMElement || $node instanceof \DOMDocument || $node instanceof \DOMDocumentFragment) {
elseif ($a->namespaceURI === Parser::XLINK_NAMESPACE) { # For each child node of the node, in tree order, run the following steps:
# The attribute's serialized name is the string "xlink:" // NOTE: the steps in question are implemented in the "serializeOuter" routine
# followed by the attribute's local name. foreach ($node->childNodes as $n) {
$name = "xlink:".self::uncoerceName($a->localName); $s .= self::serializeOuter($n);
} }
# If the attribute is in some other namespace } else {
else { throw new Exception(Exception::UNSUPPORTED_NODE_TYPE, [get_class($node)]);
# The attribute's serialized name is the attribute's qualified name.
$name = $a->name;
} }
$value = self::escapeString($a->value); return $s;
return "$name=\"$value\"';"
} }
} }

Loading…
Cancel
Save