"", self::MATHML_NAMESPACE => "math", self::SVG_NAMESPACE => "svg", self::XLINK_NAMESPACE => "xlink", self::XML_NAMESPACE => "xml", self::XMLNS_NAMESPACE => "xmlns", ]; // Protected construct used for creating an instance to access properties which must // be reset on every parse protected function __construct() { static::$instance = $this; } public function __destruct() { $this->treeBuilder->__destruct(); static::$instance = null; } public static function parse(string $data, Document $document = null, bool $file = false) { // If parse() is called by parseFragment() then don't create an instance. It has // already been created. $c = __CLASS__; if (!(static::$instance instanceof $c && !static::$instance->fragmentCase)) { static::$instance = new $c; } if (is_null($document)) { static::$instance->DOM = new Document(); } else { if ($document->hasChildNodes()) { throw new Exception(Exception::PARSER_NONEMPTY_DOCUMENT); } static::$instance->DOM = $document; } // Initialize the parse error handler. static::$instance->parseError = new ParseError; static::$instance->parseError->setHandler(); try { // Process the input stream. static::$instance->data = new Data(($file === true) ? '' : $data, ($file === true) ? $data : 'STDIN', static::$instance->parseError); // Set the locale for CTYPE to en_US.UTF8 so ctype functions and strtolower only // work on basic latin characters. Used extensively when tokenizing. setlocale(LC_CTYPE, 'en_US.UTF8'); // Initialize the stack of open elements. static::$instance->stack = new OpenElementsStack(static::$instance->fragmentCase, static::$instance->fragmentContext); // Initialize the template insertion modes stack if necessary. if (is_null(static::$instance->templateInsertionModes)) { static::$instance->templateInsertionModes = new TemplateInsertionModesStack(); } // Initialize the tokenizer. static::$instance->tokenizer = new Tokenizer(static::$instance->data, static::$instance->stack, static::$instance->parseError); // Initialize the tree builder. static::$instance->treeBuilder = new TreeBuilder(static::$instance->DOM, static::$instance->formElement, static::$instance->fragmentCase, static::$instance->fragmentContext, static::$instance->stack, static::$instance->templateInsertionModes, static::$instance->tokenizer, static::$instance->parseError, static::$instance->data); // Run the tokenizer. Tokenizer runs until after the EOF token is emitted. do { $token = static::$instance->tokenizer->createToken(); static::$instance->treeBuilder->emitToken($token); } while (!$token instanceof EOFToken); // Fix id attributes before outputting. static::$instance->DOM->fixIdAttributes(); // The Parser instance has no need to exist when finished. $dom = static::$instance->DOM; static::$instance->__destruct(); } finally { static::$instance->parseError->clearHandler(); } return $dom; } public static function parseFragment(string $data, Element $context = null, bool $file = false): \DOMDocument { // Create an instance of this class to use the non static properties. $c = __CLASS__; static::$instance = new $c; static::$instance->DOM = (is_null($context)) ? new Document() : $context->ownerDocument; static::$instance->DOMFragment = static::$instance->DOM->createDocumentFragment(); // DEVIATION: The spec says to let the document be in quirks mode if the // DOMDocument is in quirks mode. Cannot check whether the context element is in // quirks mode, so going to assume it isn't. // DEVIATION: The spec's version of parsing fragments isn't remotely useful in // the context this library is intended for use in. This implementation uses a // DOMDocumentFragment for inserting nodes into. There's no need to have a // different process for when there isn't a context. There will always be one: // the DOMDocumentFragment. static::$instance->fragmentContext = (!is_null($context)) ? $context : static::$instance->DOMFragment; $name = static::$instance->fragmentContext->nodeName; # Set the state of the HTML parser's tokenization stage as follows: switch($name) { case 'title': case 'textarea': static::$instance->tokenizer->state = Tokenizer::RCDATA_STATE; break; case 'style': case 'xmp': case 'iframe': case 'noembed': case 'noframes': static::$instance->tokenizer->state = Tokenizer::RAWTEXT_STATE; break; case 'script': static::$instance->tokenizer->state = Tokenizer::SCRIPT_STATE; break; case 'noscript': static::$instance->tokenizer->state = Tokenizer::NOSCRIPT_STATE; break; case 'plaintext': static::$instance->tokenizer->state = Tokenizer::PLAINTEXT_STATE; break; default: static::$instance->tokenizer->state = Tokenizer::DATA_STATE; } // DEVIATION: Since this implementation uses a DOMDocumentFragment for insertion // there is no need to create an html element for inserting stuff into. # If the context element is a template element, push "in template" onto the # stack of template insertion modes so that it is the new current template # insertion mode. if ($context instanceof Element && $context->nodeName === 'template') { static::$templateInsertionModes = new Stack(); static::$templateInsertionModes[] = TreeBuilder::IN_TEMPLATE_MODE; } # Reset the parser's insertion mode appropriately. // DEVIATION: The insertion mode will be always 'in body', not 'before head' if // there isn't a context. There isn't a need to reconstruct a valid HTML // document when using a DOMDocumentFragment. TreeBuilder::resetInsertionMode(); # Set the parser's form element pointer to the nearest node to the context element # that is a form element (going straight up the ancestor chain, and including the # element itself, if it is a form element), if any. (If there is no such form # element, the form element pointer keeps its initial value, null.) static::$instance->formElement = ($name === 'form') ? $context : DOM::getAncestor('form', $context); # Start the parser and let it run until it has consumed all the characters just # inserted into the input stream. static::$instance->fragmentCase = true; static::parse($data, $file); # If there is a context element, return the child nodes of root, in tree order. # Otherwise, return the children of the Document object, in tree order. // DEVIATION: This method will always return a DOMDocumentFragment. return static::$instance->DOMFragment; } }