Browse Source

Added innerHTML to Element, getting of outerHTML, started on setting

split-manual
Dustin Wilson 3 years ago
parent
commit
c1bf8c983b
  1. 2
      .gitignore
  2. 54
      lib/DOM/DOMException.php
  3. 137
      lib/DOM/Element.php
  4. 2
      lib/DOM/traits/Compare.php
  5. 6
      lib/DOM/traits/EscapeString.php
  6. 26
      lib/DOM/traits/Serialize.php
  7. 50
      lib/Exception.php

2
.gitignore

@ -1,5 +1,5 @@
# html5-parser specific
test.php
test*.php
# General
*.DS_Store

54
lib/DOM/DOMException.php

@ -0,0 +1,54 @@
<?php
declare(strict_types=1);
namespace dW\HTML5;
class DOMException extends \DOMException {
// From PHP's DOMException; keeping error codes consistent
const NO_MODIFICATION_ALLOWED = 7;
const DOCUMENT_DOCUMENTFRAG_EXPECTED = 100;
const STRING_OR_CLOSURE_EXPECTED = 101;
const OUTER_HTML_FAILED_NOPARENT = 102;
protected static $messages = [
7 => 'Modification not allowed here'
100 => 'Element, Document, or DOMDocumentFragment expected; found %s',
101 => 'The first argument must either be an instance of \DOMNode, a string, or a closure; found %s',
102 => 'Failed to set the "outerHTML" property; the element does not have a parent node'
];
public function __construct(int $code, ...$args) {
if (!isset(static::$messages[$code])) {
throw new Exception(Exception::INVALID_CODE);
}
$message = static::$messages[$code];
$previous = null;
if ($args) {
// Grab a previous exception if there is one.
if ($args[0] instanceof \Throwable) {
$previous = array_shift($args);
} elseif (end($args) instanceof \Throwable) {
$previous = array_pop($args);
}
}
// Count the number of replacements needed in the message.
preg_match_all('/(\%(?:\d+\$)?s)/', $message, $matches);
$count = count(array_unique($matches[1]));
// If the number of replacements don't match the arguments then oops.
if (count($args) !== $count) {
throw new Exception(Exception::INCORRECT_PARAMETERS_FOR_MESSAGE, $count);
}
if ($count > 0) {
// Go through each of the arguments and run sprintf on the strings.
$message = call_user_func_array('sprintf', array_merge([$message], $args));
}
parent::__construct($message, $code, $previous);
}
}

137
lib/DOM/Element.php

@ -10,8 +10,6 @@ class Element extends \DOMElement {
// Used for template elements
public $content = null;
protected const SELF_CLOSING_ELEMENTS = ['area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
public function setAttribute($name, $value) {
try {
parent::setAttribute($name, $value);
@ -24,7 +22,7 @@ class Element extends \DOMElement {
parent::setAttribute($name, $value);
}
}
public function setAttributeNS($namespaceURI, $qualifiedName, $value) {
try {
parent::setAttributeNS($namespaceURI, $qualifiedName, $value);
@ -38,6 +36,120 @@ class Element extends \DOMElement {
}
}
public function __get(string $prop) {
switch ($prop) {
### DOM Parsing Specification ###
# 2.3 The InnerHTML mixin
#
# On getting, return the result of invoking the fragment serializing algorithm
# on the context object providing true for the require well-formed flag (this
# might throw an exception instead of returning a string).
// DEVIATION: Parsing of XML documents will not be handled by this
// implementation, so there's no need for the well-formed flag.
case 'innerHTML': return $this->serialize($this);
break;
### DOM Parsing Specification ###
# 2.4 Extensions to the Element interface
# outerHTML
#
# On getting, return the result of invoking the fragment serializing algorithm on a fictional node whose only child is the context object providing true for the require well-formed flag (this might throw an exception instead of returning a string).
// DEVIATION: Parsing of XML documents will not be handled by this
// implementation, so there's no need for the well-formed flag.
// OPTIMIZATION: When following the instructions above the fragment serializing
// algorithm (Element::serialize) would invoke Element::__toString, so just
// doing that instead of multiple function calls.
case 'outerHTML': return $this->__toString();
break;
}
}
public function __set(string $prop, $value) {
switch ($prop) {
case 'innerHTML':
### DOM Parsing Specification ###
# 2.3 The InnerHTML mixin
#
# On setting, these steps must be run:
# 1. Let context element be the context object's host if the context object is a
# ShadowRoot object, or the context object otherwise.
// DEVIATION: There is no scripting in this implementation.
# 2. Let fragment be the result of invoking the fragment parsing algorithm with
# the new value as markup, and with context element.
$frag = Parser::parse($value, $this->ownerDocument, $this->ownerDocument->documentEncoding, $this);
# 3. If the context object is a template element, then let context object be the
# template's template contents (a DocumentFragment).
if ($this->nodeName === 'template') {
$this->content = $frag;
}
# 4. Replace all with fragment within the context object.
else {
# To replace all with a node within a parent, run these steps:
#
# 1. Let removedNodes be parent’s children.
// DEVIATION: removedNodes is used below for scripting. There is no scripting in
// this implementation.
# 2. Let addedNodes be parent’s children.
// DEVIATION: addedNodes is used below for scripting. There is no scripting in
// this implementation.
# 3. If node is a DocumentFragment node, then set addedNodes to node’s
# children.
// DEVIATION: Again, there is no scripting in this implementation.
# 4. Otherwise, if node is non-null, set addedNodes to « node ».
// DEVIATION: Yet again, there is no scripting in this implementation.
# 5. Remove all parent’s children, in tree order, with the suppress observers
# flag set.
// DEVIATION: There are no observers to suppress as there is no scripting in
// this implementation.
while ($this->hasChildNodes()) {
$this->removeChild($this->firstChild);
}
# 6. Otherwise, if node is non-null, set addedNodes to « node ».
# If node is non-null, then insert node into parent before null with the
# suppress observers flag set.
// DEVIATION: Yet again, there is no scripting in this implementation.
# 7. If either addedNodes or removedNodes is not empty, then queue a tree
# mutation record for parent with addedNodes, removedNodes, null, and null.
// DEVIATION: Normally the tree mutation record would do the actual replacement,
// but there is no scripting in this implementation. Going to simply append the
// fragment instead.
$this->appendChild($ook);
}
break;
case 'outerHTML':
### DOM Parsing Specification ###
# 2.4 Extensions to the Element interface
# outerHTML
#
# On setting, the following steps must be run:
# 1. Let parent be the context object's parent.
$parent = $this->parentNode;
# 2. If parent is null, terminate these steps. There would be no way to obtain a
# reference to the nodes created even if the remaining steps were run.
// The spec is unclear here as to what to do. What do you return? Most browsers
// throw an exception here, so that's what we're going to do.
if ($parent === null) {
throw new DOMException(DOMException::OUTER_HTML_FAILED_NOPARENT);
}
# 3. If parent is a Document, throw a "NoModificationAllowedError" DOMException.
elseif ($parent instanceof Document) {
throw new DOMException(DOMException::NO_MODIFICATION_ALLOWED);
}
# 4. parent is a DocumentFragment, let parent be a new Element with:
break;
}
}
public function __toString(): string {
# If current node is an element in the HTML namespace, the MathML namespace,
# or the SVG namespace, then let tagname be current node’s local name.
@ -48,9 +160,21 @@ class Element extends \DOMElement {
$tagName = $this->nodeName;
}
// Since tag names can contain characters that are invalid in PHP's XML DOM
// uncoerce the name when printing.
if (strpos($tagName, 'U') !== false) {
$tagName = $this->uncoerceName($tagName);
}
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
$s = "<$tagName";
# If current node's is value is not null, and the element does not have an is
# attribute in its attribute list, then append the string " is="", followed by
# current node's is value escaped as described below in attribute mode, followed
# by a U+0022 QUOTATION MARK character (").
// DEVIATION: There is no scripting support in this implementation.
# For each attribute that the element has, append a U+0020 SPACE character,
# the attribute’s serialized name as described below, a U+003D EQUALS SIGN
# character (=), a U+0022 QUOTATION MARK character ("), the attribute’s value,
@ -113,10 +237,9 @@ class Element extends \DOMElement {
# Append a U+003E GREATER-THAN SIGN character (>).
$s .= '>';
# If current node is an area, base, basefont, bgsound, br, col, embed, frame,
# hr, img, input, link, meta, param, source, track or wbr element, then continue
# on to the next child node at this point.
if (in_array($tagName, self::SELF_CLOSING_ELEMENTS)) {
# If current node serializes as void, then continue on to the next child node at
# this point.
if ($this->serializesAsVoid()) {
return $s;
}

2
lib/DOM/traits/Compare.php

@ -17,7 +17,7 @@ trait Compare {
return $context;
}
} else {
throw new Exception(Exception::DOM_DOMNODE_STRING_OR_CLOSURE_EXPECTED, gettype($needle));
throw new DOMException(DOMException::STRING_OR_CLOSURE_EXPECTED, gettype($needle));
}
return null;

6
lib/DOM/traits/EscapeString.php

@ -18,11 +18,7 @@ trait EscapeString {
# 4. If the algorithm was not invoked in the attribute mode, replace any
# occurrences of the "&lt;" character by the string "&amp;lt;", and any
# occurrences of the "&gt;" character by the string "&amp;gt;".
if ($attribute) {
$string = str_replace(['&quot;', '&lt;', '&gt;'], ['&amp;quot;', '&amp;lt;', '&amp;gt;'], $string);
}
return $string;
return ($attribute) ? str_replace('&quot;', '&amp;quot;', $string) : str_replace(['&lt;', '&gt;'], ['&amp;lt;', '&amp;gt;'], $string);
}
protected function coerceName(string $name): string {

26
lib/DOM/traits/Serialize.php

@ -3,21 +3,35 @@ declare(strict_types=1);
namespace dW\HTML5;
trait Serialize {
protected function serializesAsVoid(): bool {
$name = $this->nodeName;
if ($name === 'area' || $name === 'base' || $name === 'basefont' || $name === 'bgsound' || $name === 'br' || $name === 'col' || $name === 'embed' || $name === 'hr' || $name === 'img' || $name === 'input' || $name === 'link' || $name === 'meta' || $name === 'param' || $name === 'source' || $name === 'track' || $name === 'wbr') {
return true;
}
return false;
}
protected function serialize(\DOMNode $node = null): string {
if (is_null($node)) {
$node = $this;
}
if (!$node instanceof Element && !$node instanceof Document && !$node instanceof DocumentFragment) {
throw new Exception(Exception::DOM_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED, gettype($node));
throw new DOMException(DOMException::DOCUMENT_DOCUMENTFRAG_EXPECTED, gettype($node));
}
# 8.3. Serializing HTML fragments
# 13.3. Serializing HTML fragments
#
# 1. Let s be a string, and initialize it to the empty string.
# 1. If the node serializes as void, then return the empty string.
if ($this->serializesAsVoid()) {
return '';
}
# 2. Let s be a string, and initialize it to the empty string.
$s = '';
# 2. If the node is a template element, then let the node instead be the
# 3. If the node is a template element, then let the node instead be the
# template element’s template contents (a DocumentFragment node).
if ($node instanceof Element && $node->nodeName === 'template') {
$node = $node->content;
@ -43,7 +57,7 @@ trait Serialize {
$start = 1;
}
# 3. For each child node of the node, in tree order, run the following steps:
# 4. For each child node of the node, in tree order, run the following steps:
for ($i = $start; $i < $nodesLength; $i++) {
# 1. Let current node be the child node being processed.
# 2. Append the appropriate string from the following list to s:
@ -51,7 +65,7 @@ trait Serialize {
}
}
# 4. The result of the algorithm is the string s.
# 5. Return s.
return $s;
}
}

50
lib/Exception.php

@ -17,41 +17,35 @@ class Exception extends \Exception {
const DATA_NODATA = 10301;
const DATA_INVALID_DATA_CONSUMPTION_LENGTH = 10302;
const DOM_DOMNODE_STRING_OR_CLOSURE_EXPECTED = 10401;
const DOM_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10402;
const TOKENIZER_INVALID_STATE = 10401;
const TOKENIZER_INVALID_STATE = 10501;
const TREEBUILDER_FORMELEMENT_EXPECTED = 10501;
const TREEBUILDER_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10502;
const TREEBUILDER_UNEXPECTED_END_OF_FILE = 10503;
const TREEBUILDER_FORMELEMENT_EXPECTED = 10601;
const TREEBUILDER_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10602;
const TREEBUILDER_UNEXPECTED_END_OF_FILE = 10603;
protected static $messages = [
10000 => 'Invalid error code',
10001 => 'Unknown error; escaping',
10002 => 'Incorrect number of parameters for Exception message; %s expected',
const DOM_DISABLED_METHOD = 10701;
10101 => 'Non-empty Document supplied as argument for Parser',
protected static $messages = [10000 => 'Invalid error code',
10001 => 'Unknown error; escaping',
10002 => 'Incorrect number of parameters for Exception message; %s expected',
10201 => '%s is an invalid Stack index',
10202 => 'Element, Document, or DOMDocumentFragment expected for fragment context',
10203 => 'Element, string, or array expected',
10203 => 'String or array expected',
10101 => 'Non-empty Document supplied as argument for Parser',
10301 => 'Data string expected; found %s',
10302 => '%s is an invalid data consumption length; a value of 1 or above is expected',
10201 => '%s is an invalid Stack index',
10202 => 'Element, Document, or DOMDocumentFragment expected for fragment context',
10203 => 'Element, string, or array expected',
10203 => 'String or array expected',
10401 => 'The Tokenizer has entered an invalid state',
10301 => 'Data string expected; found %s',
10302 => '%s is an invalid data consumption length; a value of 1 or above is expected',
10501 => 'Form element expected, found %s',
10502 => 'Element, Document, or DOMDocumentFragment expected; found %s',
10503 => 'Unexpected end of file',
10401 => 'The first argument must either be an instance of \DOMNode, a string, or a closure; found %s',
10402 => 'Element, Document, or DOMDocumentFragment expected; found %s',
10501 => 'The Tokenizer has entered an invalid state',
10601 => 'Form element expected, found %s',
10602 => 'Element, Document, or DOMDocumentFragment expected; found %s',
10603 => 'Unexpected end of file',
10701 => 'Method %1$s::%2$s has been disabled from %1$s'];
10601 => 'Method %1$s::%2$s has been disabled from %1$s'
];
public function __construct(int $code, ...$args) {
if (!isset(static::$messages[$code])) {
@ -78,7 +72,7 @@ class Exception extends \Exception {
if (count($args) !== $count) {
throw new Exception(self::INCORRECT_PARAMETERS_FOR_MESSAGE, $count);
}
if ($count > 0) {
// Go through each of the arguments and run sprintf on the strings.
$message = call_user_func_array('sprintf', array_merge([$message], $args));

Loading…
Cancel
Save