Browse Source

Start on infoset coercison

split-manual
J. King 3 years ago
parent
commit
5d9b1a3a68
  1. 1
      RoboFile.php
  2. 41
      lib/DOM/Document.php
  3. 11
      lib/DOM/traits/EscapeString.php
  4. 9
      lib/TreeBuilder.php

1
RoboFile.php

@ -6,6 +6,7 @@ const BASE = __DIR__.\DIRECTORY_SEPARATOR;
const BASE_TEST = BASE."tests".\DIRECTORY_SEPARATOR;
define("IS_WIN", defined("PHP_WINDOWS_VERSION_MAJOR"));
define("IS_MAC", php_uname("s") === "Darwin");
error_reporting(0);
function norm(string $path): string {
$out = realpath($path);

41
lib/DOM/Document.php

@ -3,7 +3,7 @@ declare(strict_types=1);
namespace dW\HTML5;
class Document extends \DOMDocument {
use Descendant, Serialize;
use Descendant, Serialize, EscapeString;
// Quirks mode constants
public const NO_QUIRKS_MODE = 0;
@ -11,6 +11,8 @@ class Document extends \DOMDocument {
public const LIMITED_QUIRKS_MODE = 2;
public $quirksMode = self::NO_QUIRKS_MODE;
public $mangledElements = false;
public $mangledAttributes = false;
// An array of all template elements created in the document
// This exists because values of properties on derived DOM classes
@ -87,21 +89,38 @@ class Document extends \DOMDocument {
}
public function createElement($name, $value = "") {
$e = parent::createElement($name, $value);
if ($name === "template") {
$this->templateElements[] = $e;
$e->content = $this->createDocumentFragment();
try {
$e = parent::createElement($name, $value);
if ($name === "template") {
$this->templateElements[] = $e;
$e->content = $this->createDocumentFragment();
}
return $e;
} catch (\DOMException) {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H is the
// uppercase hexadecimal digits of the character's code point
$this->mangledElements = true;
$name = $this->CoerceName($name);
}
return $e;
}
public function createElementNS($namespaceURI, $qualifiedName, $value = "") {
$e = parent::createElementNS($namespaceURI, $qualifiedName, $value);
if ($qualifiedName === "template" && $namespaceURI === null) {
$this->templateElements[] = $e;
$e->content = $this->createDocumentFragment();
try {
$e = parent::createElementNS($namespaceURI, $qualifiedName, $value);
if ($qualifiedName === "template" && $namespaceURI === null) {
$this->templateElements[] = $e;
$e->content = $this->createDocumentFragment();
}
return $e;
} catch (\DOMException) {
throw $e;
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H is the
// uppercase hexadecimal digits of the character's code point
$this->mangledElements = true;
$qualifiedName = $this->CoerceName($qualifiedName);
}
return $e;
}
public function __toString() {

11
lib/DOM/traits/EscapeString.php

@ -22,4 +22,13 @@ trait EscapeString {
return $string;
}
}
protected function CoerceName(string $name): string {
// This matches the inverse of the production of NameChar in XML 1.0,
// with the added exclusion of ":" from allowed characters
// See https://www.w3.org/TR/REC-xml/#NT-NameStartChar
preg_match_all('/[^\-\.0-9\x{B7}\x{300}-\x{36F}\x{203F}-\x{2040}A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m, \PREG_OFFSET_CAPTURE);
var_export($m);
exit;
}
}

9
lib/TreeBuilder.php

@ -3347,6 +3347,7 @@ class TreeBuilder {
$formattingElementIndex = $this->activeFormattingElementsList->findToMarker($subject);
if ($formattingElementIndex > -1) {
$formattingElement = $this->activeFormattingElementsList[$formattingElementIndex]['element'];
$formattingToken = $this->activeFormattingElementsList[$formattingElementIndex]['token'];
} else {
$formattingElement = null;
}
@ -3465,8 +3466,9 @@ class TreeBuilder {
$element = $this->createElementForToken($token, null, $commonAncestor);
$this->activeFormattingElementsList[$nodeListPos] = ['token' => $nodeToken, 'element' => $element];
$this->stack[$nodeIndex] = $element;
$node = $element;
# If last node is furthest block, then move the aforementioned
# to be immediately after the new node in the list of
# bookmark to be immediately after the new node in the list of
# active formatting elements.
if ($lastNode->isSameNode($furthestBlock)) {
$bookmark = $nodeListPos + 1;
@ -3494,7 +3496,6 @@ class TreeBuilder {
# Create an element for the token for which formatting element was
# created, in the HTML namespace, with furthest block as the
# intended parent.
$formattingToken = $this->activeFormattingElementsList[$formattingElementIndex]['token'];
$element = $this->createElementForToken($formattingToken, null, $furthestBlock);
# Take all of the child nodes of furthest block and append them to
# the element created in the last step.
@ -3511,9 +3512,9 @@ class TreeBuilder {
# Remove formatting element from the stack of open elements, and
# insert the new element into the stack of open elements
# immediately below the position of furthest block in that stack.
$this->stack->insert($element, $this->stack->findSame($furthestBlock));
assert($stackIndex > 0, new \Exception("Attempting to delete root element from stack"));
unset($this->stack[$this->stack->findSame($formattingElement)]);
$this->stack->removeSame($formattingElement);
$this->stack->insert($element, $this->stack->findSame($furthestBlock) + 1);
# Jump back to the step labeled outer loop.
goto OuterLoop;
}

Loading…
Cancel
Save