Browse Source

Start on actual sanitization

master
J. King 1 year ago
parent
commit
607cd69fd2
  1. 132
      lib/AbstractSanitizer.php

132
lib/AbstractSanitizer.php

@ -21,6 +21,12 @@ namespace MensBeam\HTML;
* @see https://github.com/WICG/sanitizer-api/issues
*/
abstract class AbstractSanitizer {
/** @var int The sanitizer action to keep a node */
protected const ACTION_KEEP = 0;
/** @var int The sanitizer action to drop a node, which removes it and its children */
protected const ACTION_DROP = 1;
/** @var int The sanitizer action to block a node, which removes it while (possibly) keeping its children; this only applies to element nodes */
protected const ACTION_BLOCK = 2;
/** @var string The HTML namespace */
protected const HTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
/** @var string The MathML namespace */
@ -609,4 +615,130 @@ abstract class AbstractSanitizer {
// return the transformed configuration
return $out;
}
/** Sanitizes a document or document fragment according to the loaded configuration
*
* The returned document fragment contains clones of all valid nodes, leaving the original input untouched
*
* @param \DOMDocument|\DOMDocumentFragment $input The document or document fragment to sanitize
*/
public function sanitize(\DOMNode $input): \DOMDocumentFragment {
if (!($input instanceof \DOMDocument || $input instanceof \DOMDocumentFragment)) {
throw new \InvalidArgumentException("Argument \$input must be of type \DOMDocument|\DOMDocumentFragment");
}
# To sanitize a given input of type Document or DocumentFragment
# run these steps:
# Let fragment be the result of running the create a document fragment
# algorithm on input.
$fragment = $this->createDocumentFragment($input);
# Run the sanitize a document fragment algorithm on fragment.
$this->sanitizeDocumentFragment($fragment);
# Return fragment.
return $fragment;
}
/** Creates a copy of the input as a document fragment, which can then be safely manipulated
*
* @param \DOMDocument|\DOMDocumentFragment $input The input to clone
*/
protected function createDocumentFragment(\DOMNode $input): \DOMDocumentFragment {
assert($input instanceof \DOMDocument || $input instanceof \DOMDocumentFragment, new \Exception("Parameter \$input must be of type \DOMDocument|\DOMDocumentFragment"));
# To create a document fragment named fragment from an input of type
# Document or DocumentFragment, run these steps:
# Let node be null.
$node = null;
# Switch based on input’s type:
# If input is of type DocumentFragment, then:
# Set node to input.
# If input is of type Document, then:
# Set node to input’s documentElement.
if ($input instanceof \DOMDocumentFragment) {
$node = $input;
} elseif ($input instanceof \DOMDocument) {
$node = $input->documentElement;
}
# Let clone be the result of running clone a node on node with the
# clone children flag set.
$clone = $node->cloneNode(true);
# Let fragment be a new DocumentFragment whose node document is node’s
# node document.
$fragment = $node->ownerDocument->createDocumentFragment();
# Append the node clone to fragment.
$fragment->appendChild($clone);
# Return fragment.
return $fragment;
}
/** Iterates over a document fragment and sanitizes its nodes
*
* @param \DOMDocumentFragment $fragment The document fragment to sanitize
*/
protected function sanitizeDocumentFragment(\DOMDocumentFragment $fragment): void {
# To sanitize a document fragment named fragment with a Sanitizer
# sanitizer run these steps:
# Let m be a map that maps nodes to a sanitize action.
# Let nodes be a list containing the inclusive descendants of fragment, in tree order.
// Basically we won't be doing things this way. Rather than treating
// all of the fragment's nodes as a flat list, we'll walk the
// fragment and decide as we go, till have visited every node
$node = $fragment->firstChild;
# For each node in nodes:
while ($node) {
# Let action be the result of running the sanitize a node algorithm on node with sanitizer.
$action = $this->sanitizeNode($node);
assert(in_array($action, [self::ACTION_BLOCK, self::ACTION_DROP, self::ACTION_KEEP]), new \Exception("The result of sanitizing a node must be one of the three actions"));
# Set m[node] to action.
# For each node in nodes:
// Again, we're taking action one node at a time; m[node] is simply $action
if ($action === self::ACTION_DROP) {
# If m[node] is drop, remove node.
$victim = $node;
$node = $this->nextNode($node, false);
$victim->parentNode->removeChild($victim);
} else if ($action === self::ACTION_BLOCK) {
# If m[node] is block, create a DocumentFragment fragment,
# append all of node’s children to fragment, and replace
# node within node’s parent with fragment.
// This is a bit confusing because the variable name "fragment"
// is re-used; this is a different fragment from the input
// to this function
$victim = $node;
$node = $this->nextNode($node, true);
$frag = $victim->ownerDocument->createDocumentFragment();
while ($victim->firstChild) {
$frag->appendChild($victim->firstChild);
}
$victim->parentNode->replaceChild($victim, $frag);
} else {
# If m[node] is keep, do nothing.
$node = $this->nextNode($node, true);
}
}
}
protected function sanitizeNode(\DOMNode $node): int {
}
/** Finds the next node in tree order after $node, if any
*
* @param \DOMNode $node The context node
* @param bool $considerChildren Whether or not child nodes are valid next nodes
*/
protected function nextNode(\DOMNode $node, bool $considerChildren): ?\DOMNode {
if ($node->hasChildNodes() && $considerChildren) {
return $node->firstChild;
}
$next = $node->nextSibling;
if ($next) {
return $next;
}
while (!$next) {
$node = $node->parentNode;
if (!$node) {
return null;
}
$next = $node->nextSibling;
}
return $next;
}
}

Loading…
Cancel
Save