diff --git a/README.md b/README.md index f0bc15e..36caeaa 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,3 @@ -# HTML # +# HTML DOM # -Tools for parsing and printing HTML5 documents and fragments. - -```php -Ook!

Ook!

Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.

Eek!

'); -?> -``` - -or: - -```php -loadHTML('Ook!

Ook!

Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.

Eek!

'); -?> -``` - -## Comparison with `masterminds/html5` ## - -This library and [masterminds/html5](https://packagist.org/packages/masterminds/html5) serve similar purposes. Generally, we are more accurate, but they are much faster. The following table summarizes the main functional differences. - -| | DOMDocument | Masterminds | MensBeam | -|-----------------------------------------------------|---------------------------------------|----------------------------------------------------------|----------------------------------------| -| Minimum PHP version | 5.0 | 5.3 | 7.1 | -| Extensions required | dom | dom, ctype, mbstring or iconv | dom | -| Target HTML version | HTML 4.01 | HTML 5.0 | WHATWG Living Standard | -| Supported encodings | System-dependent | System-dependent | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#character-encodings) | -| Encoding detection | BOM, http-equiv | None | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding) (Steps 1-5 & 9) | -| Fallback encoding | ISO 8859-1 | UTF-8, configurable | Windows-1252, configurable | -| Handling of invalid characters | Bytes are passed through | Characters are dropped | [Per specification](https://encoding.spec.whatwg.org/#concept-encoding-process) | -| Handling of invalid XML element names | Variable | Name is changed to "invalid" | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#coercing-an-html-dom-into-an-infoset) | -| Handling of invalid XML attribute names | Variable | Attribute is dropped | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#coercing-an-html-dom-into-an-infoset) | -| Handling of misnested tags | Parent end tags always close children | Parent end tags always close children | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser) | -| Handling of data between table cells | Left as-is | Left as-is | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser) | -| Handling of omitted start tags | Elements are not inserted | Elements are not inserted | Per specification | -| Handling of processing instructions | Processing instructions are retained | Processing instructions are retained | Per specification | -| Handling of bogus XLink namespace\* | Foreign content not supported | XLink attributes are lost if preceded by bogus namespace | Bogus namespace is ignored | -| Namespace for HTML elements | Null | Per specification, configurable | Null | -| Time needed to parse single-page HTML specification | 0.5 seconds | 2.7 seconds† | 6.0 seconds‡ | -| Peak memory needed for same | 11.6 MB | 38 MB | 13.9 MB | - -\* For example: ``. It is unclear what correct behaviour is, but we believe our behaviour to be more consistent with the intent of the specification. - -† With HTML namespace disabled. With HTML namespace enabled it does not finish in a reasonable time due to a PHP bug. - -‡ With parse errors suppressed. Reporting parse errors adds approximately 10% overhead. \ No newline at end of file +Modern DOM library written in PHP for HTML documents. \ No newline at end of file diff --git a/RoboFile.php b/RoboFile.php index 5b3b612..8467a16 100644 --- a/RoboFile.php +++ b/RoboFile.php @@ -21,47 +21,6 @@ function norm(string $path): string { } class RoboFile extends \Robo\Tasks { - /** Generates static manual pages in the "manual" directory - * - * The resultant files are suitable for offline viewing and inclusion into release builds - */ - public function manual(array $args): Result { - $execpath = escapeshellarg(norm(BASE."vendor/bin/daux")); - $t = $this->collectionBuilder(); - $t->taskExec($execpath)->arg("generate")->option("-d", BASE."manual")->args($args); - return $t->run(); - } - - /** Serves a live view of the manual using the built-in Web server */ - public function manualLive(array $args): Result { - $execpath = escapeshellarg(norm(BASE."vendor/bin/daux")); - return $this->taskExec($execpath)->arg("serve")->args($args)->run(); - } - - /** Rebuilds the entire manual theme - * - * This requires Node and Yarn to be installed, and only needs to be done when - * Daux's theme changes - */ - public function manualTheme(array $args): Result { - $postcss = escapeshellarg(norm(BASE."node_modules/.bin/postcss")); - $themesrc = norm(BASE."docs/theme/src/").\DIRECTORY_SEPARATOR; - $themeout = norm(BASE."docs/theme/php/").\DIRECTORY_SEPARATOR; - $dauxjs = norm(BASE."vendor/daux/vendor/daux/daux.io/themes/daux/js/").\DIRECTORY_SEPARATOR; - // start a collection; this stops after the first failure - $t = $this->collectionBuilder(); - // install dependencies via Yarn - $t->taskExec("yarn install"); - // compile the stylesheet - $t->taskExec($postcss)->arg($themesrc."php.scss")->option("-o", $themeout."php.css"); - // copy JavaScript files from the Daux theme - foreach (glob($dauxjs."daux*.js") as $file) { - $t->taskFilesystemStack()->copy($file, $themeout.basename($file), true); - } - // execute the collection - return $t->run(); - } - /** Runs the typical test suite * * Arguments passed to the task are passed on to PHPUnit. Thus one may, for @@ -204,7 +163,7 @@ class RoboFile extends \Robo\Tasks { $template = <<<'FILE' =7.1", - "ext-dom": "*", - "mensbeam/intl": ">=0.9.0", - "mensbeam/mimesniff": "^0.2.0" - }, - "suggest": { - "ext-ctype": "Improved performance" + "ext-dom": "*" }, "scripts": { "post-install-cmd": ["@composer bin all install"], @@ -30,24 +25,20 @@ ], "autoload": { "psr-4": { - "MensBeam\\HTML\\": [ + "MensBeam\\HTML\\DOM\\": [ "lib/", - "lib/DOM", - "lib/DOM/traits" + "lib/traits" ] - }, - "classmap": ["lib/Token.php"], - "files": ["lib/ctype.php"] + } }, "autoload-dev": { "psr-4": { - "MensBeam\\HTML\\Test\\": "tests/lib/", - "MensBeam\\HTML\\TestCase\\": "tests/cases/" + "MensBeam\\HTML\\DOM\\Test\\": "tests/lib/", + "MensBeam\\HTML\\DOM\\TestCase\\": "tests/cases/" } }, "require-dev": { "bamarni/composer-bin-plugin": "^1.3", - "masterminds/html5": "^2.7", "daux/daux.io": "^0.16.0" } } diff --git a/composer.lock b/composer.lock index fae05e0..de8f061 100644 --- a/composer.lock +++ b/composer.lock @@ -4,168 +4,8 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "713eb048e9e334071336deca47de7e87", - "packages": [ - { - "name": "mensbeam/intl", - "version": "0.9.0", - "source": { - "type": "git", - "url": "https://github.com/mensbeam/intl.git", - "reference": "de037b182ce99aaa90ebc09b0ee0457ddf1d07bc" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/mensbeam/intl/zipball/de037b182ce99aaa90ebc09b0ee0457ddf1d07bc", - "reference": "de037b182ce99aaa90ebc09b0ee0457ddf1d07bc", - "shasum": "" - }, - "require": { - "php": ">=7.1" - }, - "require-dev": { - "bamarni/composer-bin-plugin": "*", - "ext-intl": "*" - }, - "type": "library", - "autoload": { - "psr-4": { - "MensBeam\\Intl\\": "lib/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "J. King", - "email": "jking@jkingweb.ca", - "homepage": "https://jkingweb.ca/" - } - ], - "description": "A set of dependency-free basic internationalization tools", - "keywords": [ - "WHATWG", - "charset", - "encoding", - "internationalization", - "intl", - "unicode", - "utf-8", - "utf8" - ], - "support": { - "issues": "https://github.com/mensbeam/intl/issues", - "source": "https://github.com/mensbeam/intl/tree/0.9.0" - }, - "time": "2021-03-25T19:08:04+00:00" - }, - { - "name": "mensbeam/mimesniff", - "version": "0.2.1", - "source": { - "type": "git", - "url": "https://github.com/mensbeam/mime.git", - "reference": "c19be2496ab1e27fbf9c3483c2a9faa2781796cd" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/mensbeam/mime/zipball/c19be2496ab1e27fbf9c3483c2a9faa2781796cd", - "reference": "c19be2496ab1e27fbf9c3483c2a9faa2781796cd", - "shasum": "" - }, - "require": { - "php": ">=7.1", - "psr/http-message": "^1.0" - }, - "require-dev": { - "bamarni/composer-bin-plugin": "^1.3", - "ext-intl": "*" - }, - "type": "library", - "autoload": { - "psr-4": { - "MensBeam\\Mime\\": "lib/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "J. King", - "email": "jking@jkingweb.ca", - "homepage": "https://jkingweb.ca/" - } - ], - "description": "An implementation of the WHATWG MIME Sniffing specification", - "keywords": [ - "WHATWG", - "mime", - "mimesniff" - ], - "support": { - "issues": "https://github.com/mensbeam/mime/issues", - "source": "https://github.com/mensbeam/mime/tree/0.2.1" - }, - "time": "2021-03-07T03:58:00+00:00" - }, - { - "name": "psr/http-message", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/php-fig/http-message.git", - "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363", - "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363", - "shasum": "" - }, - "require": { - "php": ">=5.3.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "psr-4": { - "Psr\\Http\\Message\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "PHP-FIG", - "homepage": "http://www.php-fig.org/" - } - ], - "description": "Common interface for HTTP messages", - "homepage": "https://github.com/php-fig/http-message", - "keywords": [ - "http", - "http-message", - "psr", - "psr-7", - "request", - "response" - ], - "support": { - "source": "https://github.com/php-fig/http-message/tree/master" - }, - "time": "2016-08-06T14:39:51+00:00" - } - ], + "content-hash": "0e733e74b1b163aa4cd80329ff9c71d0", + "packages": [], "packages-dev": [ { "name": "bamarni/composer-bin-plugin", @@ -694,75 +534,6 @@ }, "time": "2020-12-25T05:00:37+00:00" }, - { - "name": "masterminds/html5", - "version": "2.7.5", - "source": { - "type": "git", - "url": "https://github.com/Masterminds/html5-php.git", - "reference": "f640ac1bdddff06ea333a920c95bbad8872429ab" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/f640ac1bdddff06ea333a920c95bbad8872429ab", - "reference": "f640ac1bdddff06ea333a920c95bbad8872429ab", - "shasum": "" - }, - "require": { - "ext-ctype": "*", - "ext-dom": "*", - "ext-libxml": "*", - "php": ">=5.3.0" - }, - "require-dev": { - "phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.7-dev" - } - }, - "autoload": { - "psr-4": { - "Masterminds\\": "src" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Matt Butcher", - "email": "technosophos@gmail.com" - }, - { - "name": "Matt Farina", - "email": "matt@mattfarina.com" - }, - { - "name": "Asmir Mustafic", - "email": "goetas@gmail.com" - } - ], - "description": "An HTML5 parser and serializer.", - "homepage": "http://masterminds.github.io/html5-php", - "keywords": [ - "HTML5", - "dom", - "html", - "parser", - "querypath", - "serializer", - "xml" - ], - "support": { - "issues": "https://github.com/Masterminds/html5-php/issues", - "source": "https://github.com/Masterminds/html5-php/tree/2.7.5" - }, - "time": "2021-07-01T14:25:37+00:00" - }, { "name": "psr/container", "version": "1.1.1", @@ -918,6 +689,59 @@ }, "time": "2019-04-30T12:38:16+00:00" }, + { + "name": "psr/http-message", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/php-fig/http-message.git", + "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363", + "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Http\\Message\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common interface for HTTP messages", + "homepage": "https://github.com/php-fig/http-message", + "keywords": [ + "http", + "http-message", + "psr", + "psr-7", + "request", + "response" + ], + "support": { + "source": "https://github.com/php-fig/http-message/tree/master" + }, + "time": "2016-08-06T14:39:51+00:00" + }, { "name": "ralouphie/getallheaders", "version": "3.0.3", diff --git a/docs/config.json b/docs/config.json deleted file mode 100644 index 567368b..0000000 --- a/docs/config.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "title": "HTML", - "tagline": "Tools for parsing and printing HTML5 documents and fragments.", - "author": "Dustin Wilson", - "languages": { - "en": "English" - }, - "themes_directory": "docs/theme", - "html": { - "theme":"php", - "float": false, - "toggle_code": false, - "search": false - } -} diff --git a/docs/en/010_About.md b/docs/en/010_About.md deleted file mode 100644 index 57792e0..0000000 --- a/docs/en/010_About.md +++ /dev/null @@ -1 +0,0 @@ -HTML is a library which provides tools for parsing and printing of HTML5 documents and fragments. Unlike PHP's DOM and other similar libraries the goal of the project is to parse HTML as accurate to the specification as possible given the limitations of PHP's DOM and of the uses of the library. Therefore, there is no scripting in this implementation, and there likely never will be. \ No newline at end of file diff --git a/docs/en/020_Installation.md b/docs/en/020_Installation.md deleted file mode 100644 index a566886..0000000 --- a/docs/en/020_Installation.md +++ /dev/null @@ -1,8 +0,0 @@ -We try to make the installation of the MensBeam HTML library as easy and straightforward as possible. - -## Requirements ## - -HTML intentionally has few requirements. It only requires PHP 7.1.0 or later with the [dom](http://php.net/manual/en/book.dom.php) extension installed. It is recommended to install the [ctype](https://www.php.net/manual/en/book.ctype.php) extension for performance improvements, but it is not required. - -TODO: Add Installation instructions once there are releases and a package is available on Packagist. - diff --git a/docs/en/030_Document_Object_Model/010_Comment.md b/docs/en/030_Document_Object_Model/010_Comment.md deleted file mode 100644 index 489c3ef..0000000 --- a/docs/en/030_Document_Object_Model/010_Comment.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Comment ---- - -# The Comment Class # - -## Introduction ## - -

Info Only new methods and methods which make outward-facing changes from \DOMComment will be documented here, otherwise they will be linked back to PHP's documentation.

- -## Class Synopsis ## - -
MensBeam\HTML\Comment extends \DOMComment {
-
-    use LeafNode, Moonwalk;
-
-    /* Inherited properties */
-    public string $data ;
-    public readonly int $length ;
-    public readonly string $nodeName ;
-    public string $nodeValue ;
-    public readonly int $nodeType ;
-    public readonly \DOMNode|null $parentNode ;
-    public readonly \DOMNodeList $childNodes ;
-    public readonly \DOMNode|null $firstChild ;
-    public readonly \DOMNode|null $lastChild ;
-    public readonly \DOMNode|null $previousSibling ;
-    public readonly \DOMNode|null $nextSibling ;
-    public readonly \DOMNamedNodeMap|null $attributes ;
-    public readonly Document|null $ownerDocument ;
-    public readonly string|null $namespaceURI ;
-    public string $prefix ;
-    public readonly string $localName ;
-    public readonly string|null $baseURI ;
-    public string $textContent ;
-
-    /* Trait Methods */
-    public LeafNode::appendChild ( \DOMNode $node ) : DOMException;
-    public Node::C14N ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-    public Node::C14NFile ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-    public LeafNode::insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : DOMException
-    public Moonwalk::moonwalk ( \Closure|null $filter = null ) : \Generator
-    public LeafNode::removeChild ( \DOMNode $child ) : DOMException
-    public LeafNode::replaceChild ( \DOMNode $node , \DOMNode $child ) : DOMException
-
-    /* Magic Methods */
-    public __toString() : string
-
-    /* Inherited Methods */
-    public __construct ( string $data = "" )
-    public \DOMNode::cloneNode ( bool $deep = false ) : \DOMNode|false
-    public \DOMNode::getLineNo ( ) : int
-    public \DOMNode::getNodePath ( ) : string|null
-    public \DOMNode::hasAttributes ( ) : bool
-    public \DOMNode::hasChildNodes ( ) : bool
-    public \DOMNode::isDefaultNamespace ( string $namespace ) : bool
-    public \DOMNode::isSameNode ( \DOMNode $otherNode ) : bool
-    public \DOMNode::isSupported ( string $feature , string $version ) : bool
-    public \DOMNode::lookupNamespaceUri ( string $prefix ) : string
-    public \DOMNode::lookupPrefix ( string $namespace ) : string|null
-    public \DOMNode::normalize ( ) : void
-
-}
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/010_construct.md b/docs/en/030_Document_Object_Model/010_Document/010_construct.md deleted file mode 100644 index 4791f2e..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/010_construct.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Document::__construct ---- - -Document::__construct — Creates a new Document object - -## Description ## - -```php -public Document::__construct ( ) -``` - -Creates a new Document object. - -## Examples ## - -**Example \#1 Creating a new Document** - -```php - -``` \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_createEntityReference.md b/docs/en/030_Document_Object_Model/010_Document/020_createEntityReference.md deleted file mode 100644 index ecb8b16..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_createEntityReference.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Document::createEntityReference ---- - -Document::createEntityReference — **DISABLED** - -## Description ## - -```php -public Document::createEntityReference ( string $name ) : false -``` - -This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php). DOM4 does not have entity references or entity nodes. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_load.md b/docs/en/030_Document_Object_Model/010_Document/020_load.md deleted file mode 100644 index 619f745..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_load.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: Document::load ---- - -Document::load — Load HTML from a file - -## Description ## - -```php -public Document::load ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool -``` - -Loads an HTML document from a file. - -## Parameters ## - -
-
filename
-
The path to the HTML document.
- -
options
-
Always null. Was used for option constants in \DOMDocument.
- -
encodingOrContentType
-
The encoding of the document that is being loaded. If not specified it will be determined automatically.
-
- -## Return Values ## - -Returns true on success or false on failure. - -## Examples ## - -**Example \#1 Creating a Document** - -```php -load('ook.html'); -echo $dom; - -?> -``` \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_loadHTML.md b/docs/en/030_Document_Object_Model/010_Document/020_loadHTML.md deleted file mode 100644 index 03bb904..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_loadHTML.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: Document::loadHTML ---- - -Document::loadHTML — Load HTML from a string - -## Description ## - -```php -public Document::loadHTML ( string $source , null $options = null , string|null $encodingOrContentType = null ) : bool -``` - -The function parses the HTML contained in the string source. - -## Parameters ## - -
-
source
-
The HTML string.
- -
options
-
Always null. Was used for option constants in \DOMDocument.
- -
encodingOrContentType
-
The encoding of the document that is being loaded. If not specified it will be determined automatically.
-
- -## Return Values ## - -Returns true on success or false on failure. - -## Examples ## - -**Example \#1 Creating a Document** - -```php -loadHTML('Ook!

Eek

'); -echo $dom; - -?> -``` \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_loadHTMLFile.md b/docs/en/030_Document_Object_Model/010_Document/020_loadHTMLFile.md deleted file mode 100644 index 28f9a3f..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_loadHTMLFile.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Document::loadHTMLFile ---- - -Document::loadHTMLFile — Alias of Document::load() - -## Description ## - -This function is an alias of Document::load(). \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_loadXML.md b/docs/en/030_Document_Object_Model/010_Document/020_loadXML.md deleted file mode 100644 index ec63b89..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_loadXML.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Document::loadXML ---- - -Document::loadXML — **DISABLED** - -## Description ## - -```php -public Document::loadXML ( string $source , null $options = null ) : false -``` - -This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php). \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_save.md b/docs/en/030_Document_Object_Model/010_Document/020_save.md deleted file mode 100644 index f72bd03..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_save.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Document::save ---- - -Document::save — Serializes the DOM tree into a file - -## Description ## - -```php -public Document::save ( string $filename , null $options = null ) : int|false -``` - -Creates an HTML document from the DOM representation. - -## Parameters ## - -
-
filename
-
The path to the saved HTML document
- -
options
-
Always null. Was used for option constants in \DOMDocument.
-
- -## Return Values ## - -Returns the number of bytes written or false on failure. - -## Examples ## - -**Example \#1 Saving a DOM tree into a file** - -```php -loadHTML('Ook!

Eek

'); -echo 'Wrote: ' . $dom->save('/tmp/test.html') . ' bytes'; // Wrote: 85 bytes - -?> -``` \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_saveHTMLFile.md b/docs/en/030_Document_Object_Model/010_Document/020_saveHTMLFile.md deleted file mode 100644 index 9aa0a5e..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_saveHTMLFile.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Document::saveHTMLFile ---- - -Document::saveHTMLFile — Alias of Document::save() - -## Description ## - -This function is an alias of Document::save(). \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_saveXML.md b/docs/en/030_Document_Object_Model/010_Document/020_saveXML.md deleted file mode 100644 index 1267302..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_saveXML.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Document::saveXML ---- - -Document::saveXML — **DISABLED** - -## Description ## - -```php -public Document::saveXML ( DOMNode|null $node = null , null $options = null ) : false -``` - -This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php). \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_validate.md b/docs/en/030_Document_Object_Model/010_Document/020_validate.md deleted file mode 100644 index da7b908..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_validate.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Document::validate ---- - -Document::validate — **DISABLED** - -## Description ## - -```php -public Document::validate ( ) : true -``` - -This function has been disabled and will always return `true`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php). \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/020_xinclude.md b/docs/en/030_Document_Object_Model/010_Document/020_xinclude.md deleted file mode 100644 index 523bf74..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/020_xinclude.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Document::xinclude ---- - -Document::xinclude — **DISABLED** - -## Description ## - -```php -public Document::xinclude ( null $options = null ) : false -``` - -This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php). \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Document/index.md b/docs/en/030_Document_Object_Model/010_Document/index.md deleted file mode 100644 index e5f0c69..0000000 --- a/docs/en/030_Document_Object_Model/010_Document/index.md +++ /dev/null @@ -1,147 +0,0 @@ ---- -title: Document ---- - -# The Document Class # - -## Introduction ## - -Represents an entire HTML document; serves as the root of the document tree. Unlike the PHP [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php) class in which it inherits from it cannot be used to represent an XML document. It is strictly used to represent HTML. - -

Note: Only new methods and methods which make outward-facing changes from \DOMDocument will be documented here, otherwise they will be linked back to PHP's documentation.

- -## Class Synopsis ## - -
MensBeam\HTML\Document extends \DOMDocument {
-
-    use ContainerNode, Walk;
-
-    /* Constants */
-    public const NO_QUIRKS_MODE = 0 ;
-    public const QUIRKS_MODE = 1 ;
-    public const LIMITED_QUIRKS_MODE = 2 ;
-
-    /* Properties */
-    public Element|null $body = null ;
-    public string|null $documentEncoding = null ;
-    public int $quirksMode = 0 ;
-
-    /* Inherited properties */
-    public readonly \DOMNamedNodeMap|null $attributes ;
-    public readonly string|null $baseURI ;
-    public readonly \DOMNodeList $childNodes ;
-    public readonly DocumentType $doctype ;
-    public readonly Element $documentElement ;
-    public string|null $documentURI ;
-    public readonly \DOMNode|null $firstChild ;
-    public readonly \DOMImplementation $implementation ;
-    public readonly \DOMNode|null $lastChild ;
-    public readonly string $localName ;
-    public readonly string|null $namespaceURI ;
-    public readonly \DOMNode|null $nextSibling ;
-    public readonly string $nodeName ;
-    public string $nodeValue ;
-    public readonly int $nodeType ;
-    public readonly Document|null $ownerDocument ;
-    public readonly \DOMNode|null $parentNode ;
-    public string $prefix ;
-    public readonly \DOMNode|null $previousSibling ;
-    public string $textContent ;
-
-    /* Methods */
-    public __construct ( )
-    public createEntityReference ( string $name ) : false
-    public load ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
-    public loadHTML ( string $source , null $options = null , string|null $encodingOrContentType = null ) : bool
-    public loadHTMLFile ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
-    public loadXML ( string $source , null $options = null ) : false
-    public save ( string $filename , null $options = null ) : int|false
-    public saveHTMLFile ( string $filename , null $options = null ) : int|false
-    public saveXML ( \DOMNode|null $node = null , null $options = null ) : false
-    public validate ( ) : true
-    public xinclude ( null $options = null ) : false
-
-    /* Trait Methods */
-    public ContainerNode::appendChild ( \DOMNode $node ) : \DOMNode|false
-    public Node::C14N ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-    public Node::C14NFile ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-    public ContainerNode::insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : \DOMNode|false
-    public Walk::walk ( \Closure|null $filter = null ) : \Generator
-
-    /* Magic Methods */
-    public __toString() : string
-
-    /* Inherited methods */
-    public \DOMNode::cloneNode ( bool $deep = false ) : \DOMNode|false
-    public \DOMDocument::createAttribute ( string $localName ) : \DOMAttr|false
-    public \DOMDocument::createAttributeNS ( string|null $namespace , string $qualifiedName ) : \DOMAttr|false
-    public \DOMDocument::createCDATASection ( string $data ) : \DOMCdataSection|false
-    public \DOMDocument::createComment ( string $data ) : Comment|false
-    public \DOMDocument::createDocumentFragment ( ) : DocumentFragment|false
-    public \DOMDocument::createElement ( string $localName , string $value = "" ) : Element|false
-    public \DOMDocument::createElementNS ( string|null $namespace , string $qualifiedName , string $value = "" ) : Element|false
-    public \DOMDocument::createProcessingInstruction ( string $target , string $data = "" ) : ProcessingInstruction|false
-    public \DOMDocument::createTextNode ( string $data ) : Text|false
-    public \DOMDocument::getElementById ( string $elementId ) : Element|null
-    public \DOMDocument:getElementsByTagName ( string $qualifiedName ) : \DOMNodeList
-    public getElementsByTagNameNS ( string $namespace , string $localName ) : \DOMNodeList
-    public \DOMNode::getLineNo ( ) : int
-    public \DOMNode::getNodePath ( ) : string|null
-    public \DOMNode::hasAttributes ( ) : bool
-    public \DOMNode::hasChildNodes ( ) : bool
-    public \DOMDocument::importNode ( \DOMNode $node , bool $deep = false ) : \DOMNode|false
-    public \DOMNode::isDefaultNamespace ( string $namespace ) : bool
-    public \DOMNode::isSameNode ( \DOMNode $otherNode ) : bool
-    public \DOMNode::isSupported ( string $feature , string $version ) : bool
-    public \DOMNode::lookupNamespaceUri ( string $prefix ) : string
-    public \DOMNode::lookupPrefix ( string $namespace ) : string|null
-    public \DOMNode::normalize ( ) : void
-    public \DOMDocument::normalizeDocument ( ) : void
-    public \DOMDocument::registerNodeClass ( string $baseClass , string|null $extendedClass ) : bool
-    public \DOMDocument::relaxNGValidate ( string $filename ) : bool
-    public \DOMDocument::relaxNGValidateSource ( string $source ) : bool
-    public \DOMNode::removeChild ( \DOMNode $child ) : \DOMNode|false
-    public \DOMNode::replaceChild ( \DOMNode $node , \DOMNode $child ) : \DOMNode|false
-    public \DOMDocument::saveHTML ( \DOMNode|null $node = null ) : string|false
-    public \DOMDocument::schemaValidate ( string $filename , int $flags = 0 ) : bool
-    public \DOMDocument::schemaValidateSource ( string $source , int $flags = 0 ) : bool
-}
- -## Constants ## - -| Constant | Value | Description | -| ----------------------------------------------------- | ----- | ------------------------------------- | -| MensBeam\HTML\Document::NO_QUIRKS_MODE | 0 | Document not in quirks mode | -| MensBeam\HTML\Document::QUIRKS_MODE | 1 | Document is in quirks mode | -| MensBeam\HTML\Document::LIMITEDQUIRKS_MODE | 2 | Document is in limited quirks mode | - -## Properties ## - -
-
body
-
Represents the body or frameset node of the current document, or null if no such element exists.
- -
documentEncoding
-
Encoding of the document, as specified when parsing or when determining encoding type. Use this instead of \DOMDocument::encoding.
- -
quirksMode
-
Used when parsing. Specifies which mode the document was parsed in. One of the predefined quirks mode constants.
-
- -The following properties inherited from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php) have no effect in `Mensbeam\HTML\Document`, so therefore are not listed in the schema above: - -* actualEncoding -* config -* encoding -* formatOutput -* preserveWhiteSpace -* recover -* resolveExternals -* standalone -* strictErrorChecking -* substituteEntities -* validateOnParse -* version -* xmlEncoding -* xmlStandalone -* xmlVersion \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Element/010_getAttribute.md b/docs/en/030_Document_Object_Model/010_Element/010_getAttribute.md deleted file mode 100644 index 2d2fb64..0000000 --- a/docs/en/030_Document_Object_Model/010_Element/010_getAttribute.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: Element::getAttribute ---- - -Element::getAttribute — Returns value of attribute - -## Description ## - -```php -public Element::getAttribute ( string $qualifiedName ) : string|null -``` - -Gets the value of the attribute with name `qualifiedName` for the current node. - -## Parameters ## - -
-
qualifiedName
-
The name of the attribute.
-
- -## Return Values ## - -Returns a string on success or null if no attribute with the given `qualifiedName` is found. `\DOMElement::getAttribute` returns an empty string on failure which is incorrect in newer versions of the DOM. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Element/010_getAttributeNS.md b/docs/en/030_Document_Object_Model/010_Element/010_getAttributeNS.md deleted file mode 100644 index 4bba473..0000000 --- a/docs/en/030_Document_Object_Model/010_Element/010_getAttributeNS.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Element::getAttributeNS ---- - -Element::getAttributeNS — Returns value of attribute - -## Description ## - -```php -public Element::getAttribute ( string|null $namespace , string $localName ) : string|null -``` - -Gets the value of the attribute in namespace `namespace` with local name `localName` for the current node. - -## Parameters ## - -
-
namespace
-
The namespace URI.
-
localName
-
The local name of the attribute.
-
- -## Return Values ## - -Returns a string on success or null if no attribute with the given `localName` and `namespace` is found. `\DOMElement::getAttribute` returns an empty string on failure which is incorrect in newer versions of the DOM. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/010_Element/index.md b/docs/en/030_Document_Object_Model/010_Element/index.md deleted file mode 100644 index 8111dcc..0000000 --- a/docs/en/030_Document_Object_Model/010_Element/index.md +++ /dev/null @@ -1,100 +0,0 @@ ---- -title: Element ---- - -# The Element Class # - -## Introduction ## - -

Note: Only new methods and methods which make outward-facing changes from \DOMElement will be documented here, otherwise they will be linked back to PHP's documentation.

- -## Class Synopsis ## - -
MensBeam\HTML\Element extends \DOMElement {
-
-    use ContainerNode, Moonwalk, Walk;
-
-    /* Properties */
-    public readonly NodeList|null $classList ;
-    public string $innerHTML ;
-    public string $outerHTML ;
-
-    /* Inherited properties */
-    public readonly string $nodeName ;
-    public string $nodeValue ;
-    public readonly int $nodeType ;
-    public readonly \DOMNode|null $parentNode ;
-    public readonly \DOMNodeList $childNodes ;
-    public readonly \DOMNode|null $firstChild ;
-    public readonly \DOMNode|null $lastChild ;
-    public readonly \DOMNode|null $previousSibling ;
-    public readonly \DOMNode|null $nextSibling ;
-    public readonly \DOMNamedNodeMap|null $attributes ;
-    public readonly Document|null $ownerDocument ;
-    public readonly string|null $namespaceURI ;
-    public string $prefix ;
-    public readonly string $localName ;
-    public readonly string|null $baseURI ;
-    public string $textContent ;
-
-    /* Methods */
-    public getAttribute ( string $qualifiedName ) : string|null
-    public getAttributeNS ( string|null $namespace , string $localName ) : string|null
-
-    /* Trait Methods */
-    public ContainerNode::appendChild ( \DOMNode $node ) : \DOMNode|false
-    public Node::C14N ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-    public Node::C14NFile ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-    public ContainerNode::insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : \DOMNode|false
-    public Moonwalk::moonwalk ( \Closure|null $filter = null ) : \Generator
-    public Walk::walk ( \Closure|null $filter = null ) : \Generator
-
-    /* Magic Methods */
-    public __toString() : string
-
-    /* Inherited Methods */
-    public __construct ( string $qualifiedName , string|null $value = null , string $namespace = "" )
-    public \DOMNode::cloneNode ( bool $deep = false ) : \DOMNode|false
-    public \DOMElement::getAttributeNode ( string $qualifiedName ) :  \DOMAttr|false
-    public \DOMElement::getAttributeNodeNS ( string|null $namespace , string $localName ) :  \DOMAttr|null
-    public \DOMElement::getElementsByTagName ( string $qualifiedName ) :  \DOMNodeList
-    public \DOMElement::getElementsByTagNameNS ( string $namespace , string $localName ) : \DOMNodeList
-    public \DOMNode::getLineNo ( ) : int
-    public \DOMNode::getNodePath ( ) : string|null
-    public \DOMElement::hasAttribute ( string $qualifiedName ) : bool
-    public \DOMElement::hasAttributeNS ( string|null $namespace , string $localName ) : bool
-    public \DOMNode::hasAttributes ( ) : bool
-    public \DOMNode::hasChildNodes ( ) : bool
-    public \DOMNode::isDefaultNamespace ( string $namespace ) : bool
-    public \DOMNode::isSameNode ( \DOMNode $otherNode ) : bool
-    public \DOMNode::isSupported ( string $feature , string $version ) : bool
-    public \DOMNode::lookupNamespaceUri ( string $prefix ) : string
-    public \DOMNode::lookupPrefix ( string $namespace ) : string|null
-    public \DOMNode::normalize ( ) : void
-    public \DOMElement::removeAttribute ( string $qualifiedName ) : bool
-    public \DOMElement::removeAttributeNode (  \DOMAttr $attr ) :  \DOMAttr|false
-    public \DOMElement::removeAttributeNS ( string|null $namespace , string $localName ) : void
-    public \DOMElement::setAttribute ( string $qualifiedName , string $value ) :  \DOMAttr|bool
-    public \DOMNode::removeChild ( \DOMNode $child ) : \DOMNode|false
-    public \DOMNode::replaceChild ( \DOMNode $node , \DOMNode $child ) : \DOMNode|false
-    public \DOMElement::setAttributeNode (  \DOMAttr $attr ) :  \DOMAttr|null|false
-    public \DOMElement::setAttributeNodeNS (  \DOMAttr $attr ) :  \DOMAttr|null|false
-    public \DOMElement::setAttributeNS ( string|null $namespace , string $qualifiedName , string $value ) : void
-    public \DOMElement::setIdAttribute ( string $qualifiedName , bool $isId ) : void
-    public \DOMElement::setIdAttributeNode (  \DOMAttr $attr , bool $isId ) : void
-    public \DOMElement::setIdAttributeNS ( string $namespace , string $qualifiedName , bool $isId ) : void
-
-}
- -## Properties ## - -
-
classList
-
A live TokenList collection of the class attributes of the element. This can then be used to manipulate the class list.
- -
innerHTML
-
Gets or sets the HTML or XML markup contained within the element
- -
outerHTML
-
Gets the serialized HTML fragment describing the element including its descendants. It can also be set to replace the element with nodes parsed from the given string.
-
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/ContainerNode/010_appendChild.md b/docs/en/030_Document_Object_Model/ContainerNode/010_appendChild.md deleted file mode 100644 index c4cdf2f..0000000 --- a/docs/en/030_Document_Object_Model/ContainerNode/010_appendChild.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: ContainerNode::appendChild ---- - -ContainerNode::appendChild — Adds new child at the end of the children - -## Description ## - -```php -public ContainerNode::appendChild ( \DOMNode $node ) : \DOMNode|false -``` - -This function appends a child to an existing list of children or creates a new list of children. The child can be created with e.g. [`Document::createElement()`](https://www.php.net/manual/en/domdocument.createelement.php), [`Document::createTextNode()`](https://www.php.net/manual/en/domdocument.createtextnode.php) etc. or simply by using any other node. - -When using an existing node it will be moved. - -
-

Warning Only the following element types may be appended to any node using Node and subject to hierarchy restrictions depending on the type of node being appended to:

- - - -

Note that \DOMAttr is missing from this list.

-
- -## Parameters ## - -
-
node
-
The new node.
-
- -## Examples ## - -**Example \#1 Adding a child to the body** - -```php -loadHTML('Ook!'); - -$node = $dom->createElement('br'); -$dom->body->appendChild($node); - -?> -``` \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/ContainerNode/010_insertBefore.md b/docs/en/030_Document_Object_Model/ContainerNode/010_insertBefore.md deleted file mode 100644 index be5445d..0000000 --- a/docs/en/030_Document_Object_Model/ContainerNode/010_insertBefore.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: ContainerNode::insertBefore ---- - -ContainerNode::insertBefore — Adds a new child before a reference node - -## Description ## - -```php -public ContainerNode::insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : \DOMNode|false -``` - -This function inserts a new node right before the reference node. If you plan to do further modifications on the appended child you must use the returned node. - -When using an existing node it will be moved. - -
-

Warning Only the following element types may be appended to any node using Node and subject to hierarchy restrictions depending on the type of node being appended to:

- - - -

Note that \DOMAttr is missing from this list.

-
- -## Parameters ## - -
-
node
-
The new node.
- -
child
-
The reference node. If not supplied, node is appended to the children.
-
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/ContainerNode/index.md b/docs/en/030_Document_Object_Model/ContainerNode/index.md deleted file mode 100644 index ac288c9..0000000 --- a/docs/en/030_Document_Object_Model/ContainerNode/index.md +++ /dev/null @@ -1,14 +0,0 @@ -# The ContainerNode trait # - -## Introduction ## - -Allows the extended PHP DOM classes to simulate inheriting from a theoretical extended [\DOMNode](https://www.php.net/manual/en/class.domnode.php). This one implements improved DOM child insertion methods. - -
trait MensBeam\HTML\ContainerNode {
-
-    use Node;
-
-    public appendChild ( \DOMNode $node ) : \DOMNode|false
-    public insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : \DOMNode|false
-
-}
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/LeafNode/010_appendChild.md b/docs/en/030_Document_Object_Model/LeafNode/010_appendChild.md deleted file mode 100644 index 67b4625..0000000 --- a/docs/en/030_Document_Object_Model/LeafNode/010_appendChild.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: LeafNode::appendChild ---- - -LeafNode::appendChild — **DISABLED** - -## Description ## - -```php -public LeafNode::appendChild ( \DOMNode $node ) : DOMException -``` - -Throws a `DOMException` upon use. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/LeafNode/010_insertBefore.md b/docs/en/030_Document_Object_Model/LeafNode/010_insertBefore.md deleted file mode 100644 index 8d0f802..0000000 --- a/docs/en/030_Document_Object_Model/LeafNode/010_insertBefore.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: LeafNode::insertBefore ---- - -LeafNode::insertBefore — **DISABLED** - -## Description ## - -```php -public LeafNode::insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : DOMException -``` - -Throws a `DOMException` upon use. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/LeafNode/010_removeChild.md b/docs/en/030_Document_Object_Model/LeafNode/010_removeChild.md deleted file mode 100644 index 86fe255..0000000 --- a/docs/en/030_Document_Object_Model/LeafNode/010_removeChild.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: LeafNode::removeChild ---- - -LeafNode::removeChild — **DISABLED** - -## Description ## - -```php -public LeafNode::removeChild ( \DOMNode $node ) : DOMException -``` - -Throws a `DOMException` upon use. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/LeafNode/010_replaceChild.md b/docs/en/030_Document_Object_Model/LeafNode/010_replaceChild.md deleted file mode 100644 index 968b58e..0000000 --- a/docs/en/030_Document_Object_Model/LeafNode/010_replaceChild.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: LeafNode::replaceChild ---- - -LeafNode::replaceChild — **DISABLED** - -## Description ## - -```php -public LeafNode::replaceChild ( \DOMNode $node , \DOMNode $child ) : DOMException -``` - -Throws a `DOMException` upon use. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/LeafNode/index.md b/docs/en/030_Document_Object_Model/LeafNode/index.md deleted file mode 100644 index 1c84993..0000000 --- a/docs/en/030_Document_Object_Model/LeafNode/index.md +++ /dev/null @@ -1,16 +0,0 @@ -# The LeafNode trait # - -## Introduction ## - -Allows the extended PHP DOM classes to simulate inheriting from a theoretical extended [\DOMNode](https://www.php.net/manual/en/class.domnode.php). This one disables all DOM child insertion methods. - -
trait MensBeam\HTML\LeafNode {
-
-    use Node;
-    
-    public appendChild ( \DOMNode $node ) : DOMException
-    public insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : DOMException
-    public removeChild ( \DOMNode $child ) : DOMException
-    public replaceChild ( \DOMNode $node, \DOMNode $child ) : DOMException
-
-}
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/Moonwalk/010_moonwalk.md b/docs/en/030_Document_Object_Model/Moonwalk/010_moonwalk.md deleted file mode 100644 index 3896311..0000000 --- a/docs/en/030_Document_Object_Model/Moonwalk/010_moonwalk.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Moonwalk::moonwalk ---- - -Moonwalk::moonwalk — Output generator for walking up the DOM tree - -## Description ## - -
public Moonwalk::moonwalk ( \Closure|null $filter = null ) : \Generator
-
- -Non-standard. Creates a [`\Generator`](https://www.php.net/manual/en/class.generator.php) object for walking up the DOM tree. This is in lieu of recreating the awful [DOM TreeWalker API](https://developer.mozilla.org/en-US/docs/Web/API/Treewalker). - -## Examples ## - -**Example \#1 Print name of all ancestors of the H1 element** - -```php -loadHTML('Ook!

Eek

'); -$h1 = $dom->getElementsByTagName('h1')->item(0); - -// All ancestors will be elements so there's no reason to have a filter. -$tree = $h1->moonwalk(); - -foreach ($tree as $t) { - echo "{$t->nodeName}\n"; -} - -?> -``` - -The above example will output something similar to: - -```php -body -html - -``` \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/Moonwalk/index.md b/docs/en/030_Document_Object_Model/Moonwalk/index.md deleted file mode 100644 index 43fd695..0000000 --- a/docs/en/030_Document_Object_Model/Moonwalk/index.md +++ /dev/null @@ -1,11 +0,0 @@ -# The Moonwalk trait # - -## Introduction ## - -Allows the extended PHP DOM classes to Moonwalk up the DOM via a [`\Generator`](https://www.php.net/manual/en/class.generator.php). This is in lieu of recreating the awful [DOM TreeMoonwalker API](https://developer.mozilla.org/en-US/docs/Web/API/TreeMoonwalker). - -
trait MensBeam\HTML\Moonwalk {
-
-    public Moonwalk ( \Closure $filter ) : \Generator
-
-}
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/Node/010_C14N.md b/docs/en/030_Document_Object_Model/Node/010_C14N.md deleted file mode 100644 index 1228dda..0000000 --- a/docs/en/030_Document_Object_Model/Node/010_C14N.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Node::C14N ---- - -Node::C14N — **DISABLED** - -## Description ## - -```php -public Node::C14N ( bool $exclusive = false , bool $withComments = false , array|null $xpath = null , array|null $nsPrefixes = null ) : false -``` - -This function has been disabled and will always return `false`. `\DOMNode::C14N` is an extremely slow and inefficient method to serialize DOM and never should be used. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/Node/010_C14NFile.md b/docs/en/030_Document_Object_Model/Node/010_C14NFile.md deleted file mode 100644 index 0ab0f91..0000000 --- a/docs/en/030_Document_Object_Model/Node/010_C14NFile.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Node::C14NFile ---- - -Document::C14NFile — **DISABLED** - -## Description ## - -```php -public Node::C14NFile ( string $uri , bool $exclusive = false , bool $withComments = false , array|null $xpath = null , array|null $nsPrefixes = null ) : false -``` - -This function has been disabled and will always return `false`. `\DOMNode::C14NFile` is an extremely slow and inefficient method to serialize DOM and never should be used. \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/Node/index.md b/docs/en/030_Document_Object_Model/Node/index.md deleted file mode 100644 index 0489d67..0000000 --- a/docs/en/030_Document_Object_Model/Node/index.md +++ /dev/null @@ -1,12 +0,0 @@ -# The Node trait # - -## Introduction ## - -Allows the extended PHP DOM classes to simulate inheriting from a theoretical extended [\DOMNode](https://www.php.net/manual/en/class.domnode.php). It is used to disable [C14N](C14N.html) and [C14NFile](C14NFile.html). - -
trait MensBeam\HTML\Node {
-
-    public C14N ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-    public C14NFile ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
-
-}
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/Walk/010_walk.md b/docs/en/030_Document_Object_Model/Walk/010_walk.md deleted file mode 100644 index ddcf37e..0000000 --- a/docs/en/030_Document_Object_Model/Walk/010_walk.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Walk::walk ---- - -Walk::walk — Output generator for walking down the DOM tree - -## Description ## - -
public Walk::walk ( \Closure|null $filter = null ) : \Generator
-
- -Non-standard. Creates a [`\Generator`](https://www.php.net/manual/en/class.generator.php) object for walking down the DOM tree. This is in lieu of recreating the awful [DOM TreeWalker API](https://developer.mozilla.org/en-US/docs/Web/API/Treewalker). - -## Examples ## - -**Example \#1 Print name of every Element** - -```php -loadHTML('Ook!

Eek

'); -$tree = $dom->walk(function($node) { - return ($node instanceof Element); -}); - -foreach ($tree as $t) { - echo "{$t->nodeName}\n"; -} - -?> -``` - -The above example will output something similar to: - -```php -html -head -title -body -h1 - -``` \ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/Walk/index.md b/docs/en/030_Document_Object_Model/Walk/index.md deleted file mode 100644 index e9b7ec8..0000000 --- a/docs/en/030_Document_Object_Model/Walk/index.md +++ /dev/null @@ -1,11 +0,0 @@ -# The Walk trait # - -## Introduction ## - -Allows the extended PHP DOM classes to walk down the DOM via a [`\Generator`](https://www.php.net/manual/en/class.generator.php). This is in lieu of recreating the awful [DOM TreeWalker API](https://developer.mozilla.org/en-US/docs/Web/API/Treewalker). - -
trait MensBeam\HTML\Walk {
-
-    public walk ( \Closure $filter ) : \Generator
-
-}
\ No newline at end of file diff --git a/docs/en/030_Document_Object_Model/index.md b/docs/en/030_Document_Object_Model/index.md deleted file mode 100644 index 12cac6f..0000000 --- a/docs/en/030_Document_Object_Model/index.md +++ /dev/null @@ -1 +0,0 @@ -The MensBeam HTML library works by parsing HTML strings into PHP's existing XML DOM. It, however, has to force the antiquated PHP DOM extension into working properly with modern HTML DOM by extending many of the node types. The documentation below follows PHP's doc style guide as closely as possible. Each class should be listed separately in the menu under this section. diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 509b64c..0000000 --- a/docs/index.md +++ /dev/null @@ -1 +0,0 @@ -Welcome to the user manual for HTML. It is included with each copy of the software, and is also [available online](https://mensbeam.com/html/en/). Please select a language above. diff --git a/docs/theme/php/config.json b/docs/theme/php/config.json deleted file mode 100644 index 15dd40d..0000000 --- a/docs/theme/php/config.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "favicon": "favicon.png", - "js": [ - "daux.min.js" - ], - "css": [ - "php.css" - ] -} diff --git a/docs/theme/php/daux.min.js b/docs/theme/php/daux.min.js deleted file mode 100644 index fd87588..0000000 --- a/docs/theme/php/daux.min.js +++ /dev/null @@ -1,2 +0,0 @@ -var e=document.querySelectorAll(".s-content pre"),t=document.querySelector(".CodeToggler"),n="daux_code_blocks_hidden";function a(t){for(var a=0;a code:not(.hljs)");if(l.length){var i=document.getElementsByTagName("head")[0],c=document.createElement("script");c.type="text/javascript",c.async=!0,c.src="".concat(window.base_url,"daux_libraries/highlight.pack.js"),c.onload=function(e){[].forEach.call(l,window.hljs.highlightBlock)},i.appendChild(c)}function s(e){var t=void 0!==e.preventDefault;t&&e.preventDefault();var n=function(e){for(var t=e;(t=t.parentNode)&&9!==t.nodeType;)if(1===t.nodeType&&t.classList.contains("Nav__item"))return t;throw new Error("Could not find a NavItem...")}(e.target),a=n.querySelector("ul.Nav");t&&n.classList.contains("Nav__item--open")?(a.style.height="".concat(a.scrollHeight,"px"),a.style.transitionDuration="150ms",a.style.height="0px",n.classList.remove("Nav__item--open")):t?(a.style.transitionDuration="150ms",a.addEventListener("transitionend",(function e(t){"0px"!==t.target.style.height&&(t.target.style.height="auto"),t.target.removeEventListener("transitionend",e)})),a.style.height="".concat(a.scrollHeight,"px"),n.classList.add("Nav__item--open")):a.style.height="auto"}for(var d,u=document.querySelectorAll(".Nav__item.has-children i.Nav__arrow"),h=u.length-1;h>=0;h--)(d=u[h]).addEventListener("click",s),d.parentNode.parentNode.classList.contains("Nav__item--open")&&s({target:d});var g=document.querySelectorAll(".Nav__item__link--nopage"),v=!0,p=!1,_=void 0;try{for(var y,m=g[Symbol.iterator]();!(v=(y=m.next()).done);v=!0){y.value.addEventListener("click",s)}}catch(e){p=!0,_=e}finally{try{v||null==m.return||m.return()}finally{if(p)throw _}} -//# sourceMappingURL=daux.min.js.map diff --git a/docs/theme/php/php.css b/docs/theme/php/php.css deleted file mode 100644 index cffc18c..0000000 --- a/docs/theme/php/php.css +++ /dev/null @@ -1,2 +0,0 @@ -/*! normalize.css v4.1.1 | MIT License | github.com/necolas/normalize.css */ -html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%;font-size:14px}body{margin:0;padding:0}article,aside,details,figcaption,figure,footer,header,main,menu,nav,section,summary{display:block}audio,canvas,progress,video{display:inline-block}audio:not([controls]){display:none;height:0}progress,sub,sup{vertical-align:baseline}.s-content pre code:after,.s-content pre code:before,[hidden],template{display:none}a{background-color:transparent;-webkit-text-decoration-skip:objects;text-decoration:none;color:#369}a:active,a:hover{outline-width:0}abbr[title]{border-bottom:none;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}b,strong{font-weight:bolder}.s-content blockquote cite,dfn{font-style:italic}h1{font-size:2em;margin:.67em 0}mark{background-color:#ff0;color:#000}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}svg:not(:root){overflow:hidden}code,kbd,pre,samp{font-family:monospace,monospace;font-size:1em}figure{margin:1em 40px}hr{box-sizing:content-box;height:0;clear:both;margin:1em 0;border:0;border-top:1px solid #ddd}button,input,select,textarea{font:inherit;margin:0}optgroup{font-weight:700}button,hr,input{overflow:visible}button,select{text-transform:none}[type=reset],[type=submit],button,html [type=button]{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{border:1px solid silver;margin:0 2px;padding:.35em .625em .75em}legend{color:inherit;display:table;max-width:100%;white-space:normal}textarea{overflow:auto}[type=checkbox],[type=radio],legend{box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-cancel-button,[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-input-placeholder{color:inherit;opacity:.54}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}*,:after,:before{box-sizing:border-box}@media (min-width:850px){html{font-size:16px}}html{background-color:#fff;color:#333}body,html{height:100%}.Collapsible__trigger:hover .Collapsible__trigger__bar,.Columns__left{background-color:#333}.Columns__right__content{padding:10px}@media (max-width:768px){html:not(.no-js) .Collapsible__content{height:0;overflow:hidden;transition:height 400ms ease-in-out}}.Collapsible__trigger{margin:12px;padding:7px 10px;background-color:transparent;border:0;float:right;background-image:none;filter:none;box-shadow:none}.Collapsible__trigger__bar{display:block;width:18px;height:2px;margin-top:2px;margin-bottom:3px;background-color:#e8d5d3}.Collapsible__trigger:hover{background-color:#8892bf;box-shadow:none}@media screen and (min-width:769px){body{background-color:#15284b}.Navbar{position:fixed;z-index:1030;width:100%}.Collapsible__trigger{display:none!important}.Collapsible__content{display:block!important}.Columns{height:100%}.Columns:after,.Columns:before{content:" ";display:table}.Columns:after{clear:both}.Columns__left,.Columns__right{position:relative;min-height:1px;float:left;overflow:auto;height:100%}.Columns__left{width:25%;border-right:1px solid #e7e7e9;overflow-x:hidden}.Columns__right{width:75%}.Columns__right__content{padding:0 20px 20px;min-height:100%}}.Page{max-width:860px}.u-visuallyHidden{position:absolute!important;height:1px;width:1px;overflow:hidden;clip:rect(1px 1px 1px 1px);clip:rect(1px,1px,1px,1px);white-space:nowrap}body{font-feature-settings:"kern" 1;-webkit-font-kerning:normal;font-kerning:normal;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;line-height:1.618;font-size:16px;color:#333!important}body,h1,h2,h3,h4,h5,h6{font-family:sans-serif}.s-content h1,.s-content h2,.s-content h3,.s-content h4,.s-content h5,.s-content h6{cursor:text;line-height:1.4em;margin:2em 0 .5em}.s-content h1 code,.s-content h1 tt,.s-content h2 code,.s-content h2 tt,.s-content h3 code,.s-content h3 tt,.s-content h4 code,.s-content h4 tt,.s-content h5 code,.s-content h5 tt,.s-content h6 code,.s-content h6 tt{font-size:inherit}.s-content h1 i,.s-content h2 i,.s-content h3 i,.s-content h4 i,.s-content h5 i,.s-content h6 i{font-size:.7em}.s-content h1,.s-content h1 p,.s-content h2 p,.s-content h3 p,.s-content h4 p,.s-content h5 p,.s-content h6 p{margin-top:0}.s-content small{font-size:1rem}.s-content a{text-decoration:underline}.s-content p{margin-bottom:1.3em}.s-content ol,.s-content ul{padding-left:2em}.s-content ul p,.s-content ul ul{margin:0}.s-content dl{padding:0}.s-content dl dt{font-weight:700;font-style:italic;padding:0;margin:15px 0 5px}.s-content dl dt:first-child{padding:0}.s-content dl dd{margin:0 0 15px;padding:0 15px}.s-content blockquote{margin:.75em 2em;padding:.5em 1em;font-style:italic;border-left:.25em solid #333}.s-content blockquote cite:before{content:"\2014";padding-right:.5em}.s-content table{width:100%;padding:0;margin-bottom:1em;border-collapse:separate;border-spacing:2px;border:2px solid #939393}.s-content table+table{margin-top:1em}.s-content table tr{background-color:#fff;margin:0;padding:0;border-top:0}.s-content table tr:nth-child(2n){background-color:transparent}.s-content table th{font-weight:700;background:#dbdbdb}.s-content table td,.s-content table th{margin:0;padding:.5em}.s-content blockquote>:first-child,.s-content dl dd>:first-child,.s-content dl dt>:first-child,.s-content ol>:first-child,.s-content table td>:first-child,.s-content table th>:first-child,.s-content ul>:first-child{margin-top:0}.admonition p:last-child,.s-content blockquote>:last-child,.s-content dl dd>:last-child,.s-content dl dt>:last-child,.s-content ol>:last-child,.s-content table td>:last-child,.s-content table th>:last-child,.s-content ul>:last-child{margin-bottom:0}.s-content img{max-width:100%;display:inline-block}.s-content code{font-family:"Operator Mono SSm","Operator Mono",monospace;padding-top:.1rem;padding-bottom:.1rem;background:0 0;border-radius:0;box-shadow:none;padding:0;border:0;margin:0}.s-content code:after,.s-content code:before{letter-spacing:-.2em;content:"\00a0"}.s-content pre{background:#f5f2f0;line-height:1.5em;overflow:auto;border:0;border-radius:0;padding:.75em 20px;margin:0 -20px 20px}.s-content pre code{margin:0;padding:0;white-space:pre;box-shadow:none}.s-content pre code,.s-content pre tt{background-color:transparent;border:0}.s-content ins,.s-content u{text-decoration:none;border-bottom:1px solid #333}.s-content del a,.s-content ins a,.s-content u a{color:inherit}a.Link--external:after{content:" " url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAVklEQVR4Xn3PgQkAMQhDUXfqTu7kTtkpd5RA8AInfArtQ2iRXFWT2QedAfttj2FsPIOE1eCOlEuoWWjgzYaB/IkeGOrxXhqB+uA9Bfcm0lAZuh+YIeAD+cAqSz4kCMUAAAAASUVORK5CYII=)}a.Link--broken{color:red}p{margin:0 0 1em}.Button{display:inline-block;text-align:center;vertical-align:middle;touch-action:manipulation;cursor:pointer;background-image:none;border:1px solid transparent;white-space:nowrap;margin-bottom:0}.Button--small{font-size:12px;line-height:1.5;border-radius:3px}.Button--default{color:#333;background-color:#fff;border-color:#ccc}.Button--default.Button--active{color:#333;background-color:#e6e6e6;border-color:#adadad}.Brand{display:block;background-color:#4f5b93;padding:.75em .6em;font-size:1.125rem;text-shadow:none;font-family:sans-serif;color:#fff}.Navbar{box-shadow:0 1px 5px rgba(0,0,0,.25);background-color:#e63c2f;margin-bottom:0}.CodeToggler{padding:0 20px}.CodeToggler__text{font-size:12px;line-height:1.5;padding:6px 10px 6px 0;display:inline-block;vertical-align:middle}.CodeToggler--hidden,.no-js .CodeToggler,.s-content code::after,.s-content code::before{display:none}.Nav{margin:0;padding:0}.Nav__arrow{display:inline-block;position:relative;width:16px;margin-left:-16px}.Nav__arrow:before{position:absolute;display:block;content:"";left:50%;border-right:.15em solid #333;border-top:.15em solid #333;transform:rotate(45deg);transition-duration:.3s}.Nav__item,.Nav__item a{display:block}.Nav__item a{margin:0;padding:6px 15px 6px 20px;font-family:sans-serif;font-weight:400;color:#f2f2f2;text-shadow:none}.Nav__item a:hover{color:#f2f2f2;text-shadow:none;background-color:#793862}.Nav .Nav{margin-left:15px}html:not(.no-js) .Nav .Nav{height:0;transition:height 400ms ease-in-out;overflow:hidden}.Nav .Nav .Nav__item a{margin:0 0 0 -15px;padding:3px 30px;font-family:sans-serif;color:#f2f2f2;opacity:.7}.HomepageButtons .Button--hero:hover,.Nav .Nav .Nav__item a:hover{opacity:1}.Nav .Nav .Nav__item--active a{color:#f2f2f2}.Nav__item--active>a,.Nav__item--open>a{background-color:#793862}.Nav__item--open>a>.Nav__arrow:before{margin-left:-.25em;transform:rotate(135deg)}.Page__header{margin:0 0 10px;padding:0}.Page__header:after,.Page__header:before{content:" ";display:table}.Page__header:after{clear:both}.Page__header h1{padding:0;line-height:57px;font-size:1rem;border-bottom:0;margin:0}.Page__header--separator{height:.6em}.Page__header a{text-decoration:none}.Page__header .EditOn,.Page__header .ModifiedDate{float:left;font-size:10px;color:gray}.Page__header .EditOn{float:right}.Links,.Twitter{padding:0 20px}.Links a{font-family:sans-serif;font-weight:400;color:#f2f2f2;line-height:2em}.Twitter{font:11px/18px "Helvetica Neue",Arial,sans-serif}.Twitter__button{text-decoration:none;display:inline-block;vertical-align:top;position:relative;height:20px;box-sizing:border-box;padding:1px 8px 1px 6px;background-color:#1b95e0;color:#fff;border-radius:3px;font-weight:500;cursor:pointer}.Twitter__button .Twitter__button__label{display:inline-block;vertical-align:top;margin-left:3px;white-space:nowrap}.Twitter__button svg{position:relative;top:2px;display:inline-block;width:14px;height:14px}.PoweredBy{padding:0 20px 1rem;font-size:1rem}.Search{position:relative}.Search__field{display:block;width:100%;height:34px;padding:6px 30px 6px 20px;color:#555;border-width:0 0 1px;border-bottom:1px solid #ccc;background:#fff;transition:border-color ease-in-out .15s}.Search__field:focus{border-color:#8892bf;outline:0}.Search__icon{position:absolute;right:9px;top:9px;width:16px;height:16px;cursor:pointer}.Navbar .Search{float:right;margin:8px 20px}.Navbar .Search__field{box-shadow:inset 0 1px 1px rgba(0,0,0,.075);border-width:0;border-radius:4px;padding-left:10px}.TableOfContentsContainer{float:right;min-width:300px;max-width:25%;padding-left:1em}.TableOfContentsContainer__title{margin-bottom:0!important}.TableOfContentsContainer__content{border:1px solid #efefef;border-width:4px 2px 2px 6px}.TableOfContentsContainer__content>.TableOfContents>li+li{border-top:1px solid #ddd}ul.TableOfContents{font-size:1rem;padding-left:0;margin:0;list-style-type:none}ul.TableOfContents p{margin-bottom:0}ul.TableOfContents a{text-decoration:none;display:block;padding:.2em 0 .2em .75em}ul.TableOfContents .TableOfContents{padding-left:.75em}.Pager{padding-left:0;margin:1em 0;list-style:none;text-align:center}.Pager:after,.Pager:before{content:" ";display:table}.Pager,.Pager:after{clear:both}.Pager li,pre .s-content code{display:inline}.Pager li>a{display:inline-block;padding:5px 14px;background-color:#fff}.Pager li>a:focus,.Pager li>a:hover{text-decoration:none}.Pager--next>a{float:right}.Pager--prev>a{float:left}.Checkbox{position:relative;display:block;padding-left:30px;cursor:pointer}.Checkbox input{position:absolute;z-index:-1;opacity:0}.Checkbox__indicator{position:absolute;top:50%;left:0;width:20px;height:20px;margin-top:-10px;background:#e6e6e6}.Checkbox__indicator:after{position:absolute;display:none;content:""}.Checkbox input:focus~.Checkbox__indicator,.Checkbox:hover input~.Checkbox__indicator{background:#ccc}.Checkbox input:checked~.Checkbox__indicator{background:#333}.Checkbox input:checked~.Checkbox__indicator:after{display:block}.Checkbox input:checked:focus~.Checkbox__indicator,.Checkbox:hover input:not([disabled]):checked~.Checkbox__indicator{background:#8892bf}.Checkbox input:disabled~.Checkbox__indicator{pointer-events:none;opacity:.6;background:#e6e6e6}.Checkbox .Checkbox__indicator:after{top:4px;left:8px;width:5px;height:10px;transform:rotate(45deg);border:solid #fff;border-width:0 2px 2px 0}.Checkbox input:disabled~.Checkbox__indicator:after{border-color:#7b7b7b}.Container{margin-right:auto;margin-left:auto}.Container--inner{width:80%;margin:0 auto}@media (min-width:1200px){.Container{width:1170px}}@media (min-width:992px){.Container{width:970px}}@media (min-width:769px){.Container{width:750px}}.Homepage{background-color:#fff;border-radius:0;border:0;color:#333;overflow:hidden;padding-bottom:0;margin-bottom:0;box-shadow:none}.HomepageTitle h2{width:80%;font-size:30px;margin:20px auto;text-align:center}.HomepageImage img{display:block;max-width:80%;margin:0 auto;height:auto}.HomepageButtons{padding:20px 0;background-color:#e8d5d3;text-align:center}.HomepageButtons:after,.HomepageButtons:before{content:" ";display:table}.HomepageButtons:after{clear:both}.HomepageButtons .Button--hero{padding:20px 30px;border-radius:0;text-shadow:none;opacity:.8;margin:0 10px;text-transform:uppercase;border:5px solid #333;font-family:sans-serif;background-image:none;filter:none;box-shadow:none}@media (max-width:768px){.HomepageButtons .Button--hero{display:block;margin-bottom:10px}}.HomepageButtons .Button--hero.Button--secondary{background-color:#793862;color:#333}.HomepageButtons .Button--hero.Button--primary{background-color:#333;color:#333}.HomepageContent{background-color:#fff;padding:40px 0}.HomepageContent ol li,.HomepageContent ul li{list-style:none;margin-bottom:.5em;position:relative}.HomepageContent ol li:before,.HomepageContent ul li:before{position:absolute;top:50%;left:-1.5em;content:"";width:0;height:0;border:.5em solid transparent;border-left:.5em solid #8892bf;float:left;display:block;margin-top:-.5em}.HomepageContent .HeroText{font-family:sans-serif;font-weight:300;font-size:16px;margin-bottom:20px;line-height:1.4}@media (min-width:769px){.HomepageContent{padding:40px 20px}.HomepageContent .HeroText{font-size:21px}.HomepageContent .Row{margin:0 -15px}.HomepageContent .Row__half,.HomepageContent .Row__quarter,.HomepageContent .Row__third{float:left;position:relative;min-height:1px;padding-left:15px;padding-right:15px}.HomepageContent .Row__third{width:33.333333%}.HomepageContent .Row__half{width:50%}.HomepageContent .Row__quarter{width:25%}}.HomepageFooter{background-color:#333;color:#8892bf;border:0;box-shadow:none}.HomepageFooter:after,.HomepageFooter:before{content:" ";display:table}.HomepageFooter:after{clear:both}@media (max-width:768px){.HomepageFooter{padding:0 20px;text-align:center}.HomepageFooter .HomepageFooter__links{padding-left:0;list-style-type:none}}@media (min-width:769px){.HomepageFooter .HomepageFooter__links{float:left}.HomepageFooter .HomepageFooter__twitter{float:right}}.HomepageFooter__links,.HomepageFooter__twitter{margin:40px 0}.HomepageFooter__links li a{line-height:32px;font-size:16px;font-family:sans-serif;font-weight:700}.HomepageFooter__links li a:hover{text-decoration:underline}.HomepageFooter .Twitter__button{margin-bottom:20px}@media print{*{text-shadow:none!important;color:#000!important;background:0 0!important;box-shadow:none!important}h1,h2,h3,h4,h5,h6{-moz-column-break-after:avoid;break-after:avoid;-moz-column-break-before:auto;break-before:auto}blockquote,img,pre{-moz-column-break-inside:avoid;break-inside:avoid}blockquote,pre{border:1px solid #999;font-style:italic}img{border:0}a,a:visited{text-decoration:underline}abbr[title]:after{content:" (" attr(title) ")"}q{quotes:none}.s-content a[href^="#"]:after,q:before{content:""}q:after{content:" (" attr(cite) ")"}.PageBreak{display:block;-moz-column-break-before:always;break-before:always}.NoPrint,.Pager,aside{display:none}.Columns__right{width:100%!important}.s-content a:after{content:" (" attr(href) ")";font-size:80%;word-wrap:break-word}h1 a[href]:after{font-size:50%}}.Columns__right__content,body{background-color:#f2f2f2}a.Link--external::after{content:''}.s-content h1,.s-content h2,.s-content h3,.s-content h4,.s-content h5,.s-content h6{margin-bottom:1.5rem}.s-content h1{font-size:1.75rem}.s-content h2{font-size:1.5rem}.s-content h3{font-size:1.25rem}.s-content h4{font-size:1.125rem}.Nav__item .Nav__item,.s-content h5,.s-content h6,.s-content table{font-size:1rem}.s-content table tbody,.s-content table thead{background-color:#fff}.s-content table tr:nth-child(2n) td{background-color:#fff}.s-content table td,.s-content table th{border:0}.s-content table th{background-color:#c4c9df}.Brand,h1,h2,h3,h4,h5,h6{font-weight:600;font-stretch:condensed}h1,h2,h3,h4,h5,h6{color:#793862;border-bottom:1px dotted #333;padding-bottom:5px}.Button,.Pager li>a{border-radius:0}.HomepageButtons .Button--hero{font-weight:400;font-size:1rem}.Page__header{border-bottom:0}.Pager li>a{border:2px solid #dbdbdb}.Pager li>a:focus,.Pager li>a:hover{background-color:#dbdbdb}.Pager--prev a::before{content:"\2190\00a0"}.Pager--next a::after{content:"\00a0\2192"}.Navbar{height:auto;box-shadow:none}.Navbar .Brand{float:none;line-height:inherit;height:auto}.Homepage{padding-top:10px!important}.Nav__item{font-size:1rem}.Nav .Nav .Nav__item a{padding-left:35px}.Nav__arrow:before{margin:0 0 0 -.25em;top:auto;bottom:calc(50% - .0625em);width:.375em;height:.375em;transform-origin:center}.Nav .Nav .Nav__item a .Nav__arrow:before,.Nav__arrow:before{border-right-color:#f2f2f2;border-top-color:#f2f2f2}.admonition{padding:.75rem;margin:1.5rem 0;border:1px solid #c2c2c2;background-color:#fff}.admonition .danger{background-color:#f4dfdf;border-color:#c4b4b4}.hljs,.s-content pre{background:#15284b;color:#e8d5d3}.hljs{display:block;overflow-x:auto;padding:.5em}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700}.hljs-comment,.hljs-quote{color:#978e9c}.hljs-addition,.hljs-keyword,.hljs-selector-tag{color:#acb39a}.hljs-doctag,.hljs-literal,.hljs-meta .hljs-meta-string,.hljs-number,.hljs-regexp,.hljs-string{color:#93b7bb}.hljs-name,.hljs-section,.hljs-selector-class,.hljs-selector-id,.hljs-title{color:#82b7e5}.hljs-attr,.hljs-attribute,.hljs-class .hljs-title,.hljs-template-variable,.hljs-type,.hljs-variable{color:#c5b031}.hljs-bullet,.hljs-link,.hljs-meta,.hljs-meta .hljs-keyword,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-subst,.hljs-symbol{color:#ea8031}.hljs-built_in,.hljs-deletion{color:#e63c2f}.hljs-formula{background:#686986}@media (min-width:850px){.Columns__left{border:0}} \ No newline at end of file diff --git a/docs/theme/src/php.scss b/docs/theme/src/php.scss deleted file mode 100644 index 3041d69..0000000 --- a/docs/theme/src/php.scss +++ /dev/null @@ -1,324 +0,0 @@ -/* Daux imports; fonts are omitted */ -@import "../../../vendor/daux/daux.io/src/css/theme_daux/vendor/normalize.scss"; -@import "../../../vendor/daux/daux.io/src/css/theme_daux/_variables.scss"; -@import "../../../vendor/daux/daux.io/src/css/theme_daux/_mixins.scss"; -@import "../../../vendor/daux/daux.io/src/css/theme_daux/_structure.scss"; -@import "../../../vendor/daux/daux.io/src/css/theme_daux/_typography.scss"; -@import "../../../vendor/daux/daux.io/src/css/theme_daux/_components.scss"; -@import "../../../vendor/daux/daux.io/src/css/theme_daux/_homepage.scss"; -@import "../../../vendor/daux/daux.io/src/css/theme_daux/_print.scss" print; - -/* Overrides */ - -:root { - --font-family-text: sans-serif; - --font-family-monospace: "Operator Mono SSm", "Operator Mono", monospace; - --font-family-heading: sans-serif; - - --type-size-1: 1.75rem; - --type-size-2: 1.5rem; - --type-size-3: 1.25rem; - --type-size-4: 1.125rem; - --type-size-5: 1rem; - --type-size-6: 1rem; - - --purple: #4f5b93; - --tyrian: #793862; - --light-purple: #8892bf; - --lighter-purple: #c4c9df; - --danger: #f4dfdf; - - --page: #f2f2f2; - --text: #333; - - --red: #e63c2f; - --blue: #15284b; - --light-blue: #93b7bb; - --beige: #e8d5d3; - --green: #2c9a42; - - --dark-gray: color(var(--page) blend(var(--text) 75%)); - --gray: color(var(--page) blend(var(--text) 50%)); - --light-gray: color(var(--page) blend(var(--text) 25%)); - --lighter-gray: color(var(--page) blend(var(--text) 12.5%)); - --lightest-gray: color(#fff blend(var(--page) 75%)); - - --dark: var(--text); - --light: var(--light-purple); - - --sidebar-background: var(--text); - --sidebar-link-active-background: var(--tyrian); - --sidebar-link-color: var(--page); - --sidebar-link-secondary-color: var(--page); - --sidebar-collapsible--hamburger-color: var(--beige); - - --link-color: #369; - --brand-color: #fff; - --brand-background: var(--purple); - - --code-tag-background-color: transparent; - --code-tag-border-radius: 0; - --code-tag-box-shadow: none; - - --homepage-navbar-background: var(--red); - --hero-button-block-background: var(--beige); - --homepage-hero-background: #fff; - --content-floating-blocks-background: var(--blue); -} - -body { - line-height: 1.618; - font-size: 16px; - color: var(--text) !important; -} - -body, .Columns__right__content { - background-color: var(--page); -} - -a.Link--external::after { - content: ''; -} - -.Page__header h1 { - font-size: var(--type-size-6); - border-bottom: 0; - margin-bottom: 0; -} - -.s-content { - h1, h2, h3, h4, h5, h6 { - margin-bottom: 1.5rem; - } - - h1 { - font-size: var(--type-size-1); - } - - h2 { - font-size: var(--type-size-2); - } - - h3 { - font-size: var(--type-size-3); - } - - h4 { - font-size: var(--type-size-4); - } - - h5 { - font-size: var(--type-size-5); - } - - h6 { - font-size: var(--type-size-6); - } - - - code { - padding-top: 0; - padding-bottom: 0; - padding: 0; - border: 0; - margin: 0; - - &::before, &::after { - display: none; - } - - pre & { - display: inline; - } - } - - table { - border-collapse: separate; - border-spacing: 2px; - border: 2px solid var(--gray); - - thead, tbody { - background-color: #fff; - } - - tr { - border-top: 0; - - &:nth-child(2n) { - background-color: transparent; - - td { - background-color: #fff; - } - } - } - - th, td { - border: 0; - } - - th { - background-color: var(--lighter-purple); - } - } -} - -.s-content table, .Nav__item .Nav__item { - font-size: 1rem; -} - -.Brand, h1, h2, h3, h4, h5, h6 { - font-weight: 600; - font-stretch: condensed; -} - -h1, h2, h3, h4, h5, h6 { - color: var(--tyrian); - border-bottom: 1px dotted var(--text); - padding-bottom: 5px; -} - -.Button { - border-radius: 0; -} - -.HomepageButtons .Button--hero { - font-weight: normal; - font-size: var(--type-size-6); -} - -.Page__header { - border-bottom: 0; -} - -.Pager li > a { - border: 2px solid var(--lighter-gray); - border-radius: 0; - - &:hover, &:focus { - background-color: var(--lighter-gray); - } -} - -.Pager--prev a::before { - content: "\2190\00a0"; -} -.Pager--next a::after { - content: "\00a0\2192"; -} - -.Navbar { - height: auto; - box-shadow: none; - - .Brand { - float: none; - line-height: inherit; - height: auto; - } -} - -.Homepage { - padding-top: 10px !important; -} - -.Nav__item { - font-size: var(--type-size-6); -} - -.Nav .Nav .Nav__item a { - padding-left: 35px; -} - -.Nav__arrow:before { - margin: 0 0 0 -.25em; - top: auto; - bottom: calc(50% - 0.0625em); - width: 0.375em; - height: 0.375em; - transform-origin: center; -} - -.Nav__arrow:before, .Nav .Nav .Nav__item a .Nav__arrow:before { - border-right-color: var(--page); - border-top-color: var(--page); -} - -.admonition { - padding: 0.75rem; - margin: 1.5rem 0; - border: 1px solid var(--light-gray); - background-color: #fff; - - p:last-child { - margin-bottom: 0; - } - - .danger { - background-color: var(--danger); - border-color: color(var(--danger) blend(var(--text) 25%)); - } -} - -.hljs, .s-content pre { - background: var(--blue); - color: var(--beige); -} - -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; -} - -.hljs-emphasis { - font-style: italic; -} - -.hljs-strong { - font-weight: bold; -} - -.hljs-comment, .hljs-quote { - color: #978e9c; -} - -/* Green */ -.hljs-keyword, .hljs-selector-tag, .hljs-addition { - color: #acb39a; -} - -/* Cyan */ -.hljs-number, .hljs-string, .hljs-meta .hljs-meta-string, .hljs-literal, .hljs-doctag, .hljs-regexp { - color: var(--light-blue); -} - -/* Blue */ -.hljs-title, .hljs-section, .hljs-name, .hljs-selector-id, .hljs-selector-class { - color: #82b7e5; -} - -/* Yellow */ -.hljs-attribute, .hljs-attr, .hljs-variable, .hljs-template-variable, .hljs-class .hljs-title, .hljs-type { - color: #c5b031; -} - -/* Orange */ -.hljs-symbol, .hljs-bullet, .hljs-subst, .hljs-meta, .hljs-meta .hljs-keyword, .hljs-selector-attr, .hljs-selector-pseudo, .hljs-link { - color: #ea8031; -} - -/* Red */ -.hljs-built_in, .hljs-deletion { - color: var(--red); -} - -.hljs-formula { - background: #686986; -} - -@media (--viewport-large) { - .Columns__left { - border: 0; - } -} diff --git a/lib/DOM/AbstractDocument.php b/lib/AbstractDocument.php similarity index 92% rename from lib/DOM/AbstractDocument.php rename to lib/AbstractDocument.php index 1e162fe..e721f7e 100644 --- a/lib/DOM/AbstractDocument.php +++ b/lib/AbstractDocument.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; // Exists so Document can extend methods from its traits. abstract class AbstractDocument extends \DOMDocument { diff --git a/lib/ActiveFormattingElementsList.php b/lib/ActiveFormattingElementsList.php deleted file mode 100644 index 0bd06e2..0000000 --- a/lib/ActiveFormattingElementsList.php +++ /dev/null @@ -1,184 +0,0 @@ -count; - assert($offset >= 0 && $offset <= $count, new Exception(Exception::STACK_INVALID_INDEX, $offset)); - assert($value instanceof ActiveFormattingElementsMarker || ( - is_array($value) - && count($value) === 2 - && isset($value['token']) - && isset($value['element']) - && $value['token'] instanceof StartTagToken - && $value['element'] instanceof \DOMElement - ), new Exception(Exception::STACK_INVALID_VALUE)); - if ($value instanceof ActiveFormattingElementsMarker) { - $this->_storage[$offset ?? $count] = $value; - } elseif ($count && ($offset ?? $count) === $count) { - # When the steps below require the UA to push onto the list of active formatting - # elements an element element, the UA must perform the following steps: - // First find the position of the last marker, if any - $lastMarker = -1; - foreach ($this as $pos => $item) { - if ($item instanceof ActiveFormattingElementsMarker) { - $lastMarker = $pos; - break; - } - } - # If there are already three elements in the list of active formatting - # elements after the last marker, if any, or anywhere in the list if there are - # no markers, that have the same tag name, namespace, and attributes as element, - # then remove the earliest such element from the list of active formatting - # elements. - $pos = $count - 1; - $matches = 0; - if ($pos > $lastMarker) { - do { - $matches += (int) $this->matchElement($value['element'], $this->_storage[$pos]['element']); - // Stop once there are three matches or the marker is reached - } while ($matches < 3 && (--$pos) > $lastMarker); - } - if ($matches === 3) { - $this->offsetUnset($pos); - } - # Add element to the list of active formatting elements. - $this->_storage[] = $value; - } else { - $this->_storage[$offset ?? $count] = $value; - } - $this->count = count($this->_storage); - } - - protected function matchElement(\DOMElement $a, \DOMElement $b): bool { - // Compare elements as part of pushing an element onto the stack - # 1. If there are already three elements in the list of active formatting - # elements after the last marker, if any, or anywhere in the list if there are - # no markers, that have the same tag name, namespace, and attributes as element, - # then remove the earliest such element from the list of active formatting - # elements. - # For these purposes, the attributes must be compared as they were - # when the elements were created by the parser; two elements have the same - # attributes if all their parsed attributes can be paired such that the two - # attributes in each pair have identical names, namespaces, and values (the - # order of the attributes does not matter). - if ( - $a->nodeName !== $b->nodeName - || $a->namespaceURI !== $b->namespaceURI - || $a->attributes->length !== $b->attributes->length - ) { - return false; - } - foreach ($a->attributes as $attr) { - if (!$b->hasAttributeNS($attr->namespaceURI, $attr->nodeName) || $b->getAttributeNS($attr->namespaceURI, $attr->nodeName) !== $attr->value) { - return false; - } - } - return true; - } - - public function insert(StartTagToken $token, \DOMElement $element, ?int $at = null): void { - assert($at === null || ($at >= 0 && $at <= $this->count), new Exception(Exception::STACK_INVALID_INDEX, $at)); - if ($at === null) { - $this[] = [ - 'token' => $token, - 'element' => $element - ]; - } else { - array_splice($this->_storage, $at, 0, [[ - 'token' => $token, - 'element' => $element, - ]]); - $this->count = count($this->_storage); - } - } - - public function insertMarker(): void { - $this[] = new ActiveFormattingElementsMarker; - } - - public function clearToTheLastMarker(): void { - # When the steps below require the UA to clear the list of active formatting - # elements up to the last marker, the UA must perform the following steps: - # 1. Let entry be the last (most recently added) entry in the list of active - # formatting elements. - # 2. Remove entry from the list of active formatting elements. - # 3. If entry was a marker, then stop the algorithm at this point. The list has - # been cleared up to the last marker. - # 4. Go to step 1. - while ($this->_storage) { - $popped = array_pop($this->_storage); - if ($popped instanceof ActiveFormattingElementsMarker) { - break; - } - } - $this->count = count($this->_storage); - } - - public function findSame(\DOMElement $target): int { - foreach ($this as $k => $entry) { - if (!$entry instanceof ActiveFormattingElementsMarker && $entry['element']->isSameNode($target)) { - return $k; - } - } - return -1; - } - - public function findToMarker(string ...$name): int { - foreach ($this as $k => $entry) { - if ($entry instanceof ActiveFormattingElementsMarker) { - return -1; - } - if (in_array($entry['element']->nodeName, $name)) { - return $k; - } - } - return -1; - } - - public function removeSame(\DOMElement $target): void { - $pos = $this->findSame($target); - if ($pos > -1) { - unset($this[$pos]); - } - } - - /** @codeCoverageIgnore */ - public function __toString(): string { - $out = []; - foreach ($this as $entry) { - if ($entry instanceof ActiveFormattingElementsMarker) { - $out[] = "|"; - } else { - $node = $entry['element']; - $ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE; - $prefix = Parser::NAMESPACE_MAP[$ns] ?? "?"; - $prefix .= $prefix ? " " : ""; - $out[] = $prefix.$node->nodeName; - } - } - return implode(" - ", $out); - } -} - -class ActiveFormattingElementsMarker { -} diff --git a/lib/CharacterReference.php b/lib/CharacterReference.php deleted file mode 100644 index d107a16..0000000 --- a/lib/CharacterReference.php +++ /dev/null @@ -1,19 +0,0 @@ -"\u{c6}",'AElig;'=>"\u{c6}",'AMP'=>"\u{26}",'AMP;'=>"\u{26}",'Aacute'=>"\u{c1}",'Aacute;'=>"\u{c1}",'Abreve;'=>"\u{102}",'Acirc'=>"\u{c2}",'Acirc;'=>"\u{c2}",'Acy;'=>"\u{410}",'Afr;'=>"\u{1d504}",'Agrave'=>"\u{c0}",'Agrave;'=>"\u{c0}",'Alpha;'=>"\u{391}",'Amacr;'=>"\u{100}",'And;'=>"\u{2a53}",'Aogon;'=>"\u{104}",'Aopf;'=>"\u{1d538}",'ApplyFunction;'=>"\u{2061}",'Aring'=>"\u{c5}",'Aring;'=>"\u{c5}",'Ascr;'=>"\u{1d49c}",'Assign;'=>"\u{2254}",'Atilde'=>"\u{c3}",'Atilde;'=>"\u{c3}",'Auml'=>"\u{c4}",'Auml;'=>"\u{c4}",'Backslash;'=>"\u{2216}",'Barv;'=>"\u{2ae7}",'Barwed;'=>"\u{2306}",'Bcy;'=>"\u{411}",'Because;'=>"\u{2235}",'Bernoullis;'=>"\u{212c}",'Beta;'=>"\u{392}",'Bfr;'=>"\u{1d505}",'Bopf;'=>"\u{1d539}",'Breve;'=>"\u{2d8}",'Bscr;'=>"\u{212c}",'Bumpeq;'=>"\u{224e}",'CHcy;'=>"\u{427}",'COPY'=>"\u{a9}",'COPY;'=>"\u{a9}",'Cacute;'=>"\u{106}",'Cap;'=>"\u{22d2}",'CapitalDifferentialD;'=>"\u{2145}",'Cayleys;'=>"\u{212d}",'Ccaron;'=>"\u{10c}",'Ccedil'=>"\u{c7}",'Ccedil;'=>"\u{c7}",'Ccirc;'=>"\u{108}",'Cconint;'=>"\u{2230}",'Cdot;'=>"\u{10a}",'Cedilla;'=>"\u{b8}",'CenterDot;'=>"\u{b7}",'Cfr;'=>"\u{212d}",'Chi;'=>"\u{3a7}",'CircleDot;'=>"\u{2299}",'CircleMinus;'=>"\u{2296}",'CirclePlus;'=>"\u{2295}",'CircleTimes;'=>"\u{2297}",'ClockwiseContourIntegral;'=>"\u{2232}",'CloseCurlyDoubleQuote;'=>"\u{201d}",'CloseCurlyQuote;'=>"\u{2019}",'Colon;'=>"\u{2237}",'Colone;'=>"\u{2a74}",'Congruent;'=>"\u{2261}",'Conint;'=>"\u{222f}",'ContourIntegral;'=>"\u{222e}",'Copf;'=>"\u{2102}",'Coproduct;'=>"\u{2210}",'CounterClockwiseContourIntegral;'=>"\u{2233}",'Cross;'=>"\u{2a2f}",'Cscr;'=>"\u{1d49e}",'Cup;'=>"\u{22d3}",'CupCap;'=>"\u{224d}",'DD;'=>"\u{2145}",'DDotrahd;'=>"\u{2911}",'DJcy;'=>"\u{402}",'DScy;'=>"\u{405}",'DZcy;'=>"\u{40f}",'Dagger;'=>"\u{2021}",'Darr;'=>"\u{21a1}",'Dashv;'=>"\u{2ae4}",'Dcaron;'=>"\u{10e}",'Dcy;'=>"\u{414}",'Del;'=>"\u{2207}",'Delta;'=>"\u{394}",'Dfr;'=>"\u{1d507}",'DiacriticalAcute;'=>"\u{b4}",'DiacriticalDot;'=>"\u{2d9}",'DiacriticalDoubleAcute;'=>"\u{2dd}",'DiacriticalGrave;'=>"\u{60}",'DiacriticalTilde;'=>"\u{2dc}",'Diamond;'=>"\u{22c4}",'DifferentialD;'=>"\u{2146}",'Dopf;'=>"\u{1d53b}",'Dot;'=>"\u{a8}",'DotDot;'=>"\u{20dc}",'DotEqual;'=>"\u{2250}",'DoubleContourIntegral;'=>"\u{222f}",'DoubleDot;'=>"\u{a8}",'DoubleDownArrow;'=>"\u{21d3}",'DoubleLeftArrow;'=>"\u{21d0}",'DoubleLeftRightArrow;'=>"\u{21d4}",'DoubleLeftTee;'=>"\u{2ae4}",'DoubleLongLeftArrow;'=>"\u{27f8}",'DoubleLongLeftRightArrow;'=>"\u{27fa}",'DoubleLongRightArrow;'=>"\u{27f9}",'DoubleRightArrow;'=>"\u{21d2}",'DoubleRightTee;'=>"\u{22a8}",'DoubleUpArrow;'=>"\u{21d1}",'DoubleUpDownArrow;'=>"\u{21d5}",'DoubleVerticalBar;'=>"\u{2225}",'DownArrow;'=>"\u{2193}",'DownArrowBar;'=>"\u{2913}",'DownArrowUpArrow;'=>"\u{21f5}",'DownBreve;'=>"\u{311}",'DownLeftRightVector;'=>"\u{2950}",'DownLeftTeeVector;'=>"\u{295e}",'DownLeftVector;'=>"\u{21bd}",'DownLeftVectorBar;'=>"\u{2956}",'DownRightTeeVector;'=>"\u{295f}",'DownRightVector;'=>"\u{21c1}",'DownRightVectorBar;'=>"\u{2957}",'DownTee;'=>"\u{22a4}",'DownTeeArrow;'=>"\u{21a7}",'Downarrow;'=>"\u{21d3}",'Dscr;'=>"\u{1d49f}",'Dstrok;'=>"\u{110}",'ENG;'=>"\u{14a}",'ETH'=>"\u{d0}",'ETH;'=>"\u{d0}",'Eacute'=>"\u{c9}",'Eacute;'=>"\u{c9}",'Ecaron;'=>"\u{11a}",'Ecirc'=>"\u{ca}",'Ecirc;'=>"\u{ca}",'Ecy;'=>"\u{42d}",'Edot;'=>"\u{116}",'Efr;'=>"\u{1d508}",'Egrave'=>"\u{c8}",'Egrave;'=>"\u{c8}",'Element;'=>"\u{2208}",'Emacr;'=>"\u{112}",'EmptySmallSquare;'=>"\u{25fb}",'EmptyVerySmallSquare;'=>"\u{25ab}",'Eogon;'=>"\u{118}",'Eopf;'=>"\u{1d53c}",'Epsilon;'=>"\u{395}",'Equal;'=>"\u{2a75}",'EqualTilde;'=>"\u{2242}",'Equilibrium;'=>"\u{21cc}",'Escr;'=>"\u{2130}",'Esim;'=>"\u{2a73}",'Eta;'=>"\u{397}",'Euml'=>"\u{cb}",'Euml;'=>"\u{cb}",'Exists;'=>"\u{2203}",'ExponentialE;'=>"\u{2147}",'Fcy;'=>"\u{424}",'Ffr;'=>"\u{1d509}",'FilledSmallSquare;'=>"\u{25fc}",'FilledVerySmallSquare;'=>"\u{25aa}",'Fopf;'=>"\u{1d53d}",'ForAll;'=>"\u{2200}",'Fouriertrf;'=>"\u{2131}",'Fscr;'=>"\u{2131}",'GJcy;'=>"\u{403}",'GT'=>"\u{3e}",'GT;'=>"\u{3e}",'Gamma;'=>"\u{393}",'Gammad;'=>"\u{3dc}",'Gbreve;'=>"\u{11e}",'Gcedil;'=>"\u{122}",'Gcirc;'=>"\u{11c}",'Gcy;'=>"\u{413}",'Gdot;'=>"\u{120}",'Gfr;'=>"\u{1d50a}",'Gg;'=>"\u{22d9}",'Gopf;'=>"\u{1d53e}",'GreaterEqual;'=>"\u{2265}",'GreaterEqualLess;'=>"\u{22db}",'GreaterFullEqual;'=>"\u{2267}",'GreaterGreater;'=>"\u{2aa2}",'GreaterLess;'=>"\u{2277}",'GreaterSlantEqual;'=>"\u{2a7e}",'GreaterTilde;'=>"\u{2273}",'Gscr;'=>"\u{1d4a2}",'Gt;'=>"\u{226b}",'HARDcy;'=>"\u{42a}",'Hacek;'=>"\u{2c7}",'Hat;'=>"\u{5e}",'Hcirc;'=>"\u{124}",'Hfr;'=>"\u{210c}",'HilbertSpace;'=>"\u{210b}",'Hopf;'=>"\u{210d}",'HorizontalLine;'=>"\u{2500}",'Hscr;'=>"\u{210b}",'Hstrok;'=>"\u{126}",'HumpDownHump;'=>"\u{224e}",'HumpEqual;'=>"\u{224f}",'IEcy;'=>"\u{415}",'IJlig;'=>"\u{132}",'IOcy;'=>"\u{401}",'Iacute'=>"\u{cd}",'Iacute;'=>"\u{cd}",'Icirc'=>"\u{ce}",'Icirc;'=>"\u{ce}",'Icy;'=>"\u{418}",'Idot;'=>"\u{130}",'Ifr;'=>"\u{2111}",'Igrave'=>"\u{cc}",'Igrave;'=>"\u{cc}",'Im;'=>"\u{2111}",'Imacr;'=>"\u{12a}",'ImaginaryI;'=>"\u{2148}",'Implies;'=>"\u{21d2}",'Int;'=>"\u{222c}",'Integral;'=>"\u{222b}",'Intersection;'=>"\u{22c2}",'InvisibleComma;'=>"\u{2063}",'InvisibleTimes;'=>"\u{2062}",'Iogon;'=>"\u{12e}",'Iopf;'=>"\u{1d540}",'Iota;'=>"\u{399}",'Iscr;'=>"\u{2110}",'Itilde;'=>"\u{128}",'Iukcy;'=>"\u{406}",'Iuml'=>"\u{cf}",'Iuml;'=>"\u{cf}",'Jcirc;'=>"\u{134}",'Jcy;'=>"\u{419}",'Jfr;'=>"\u{1d50d}",'Jopf;'=>"\u{1d541}",'Jscr;'=>"\u{1d4a5}",'Jsercy;'=>"\u{408}",'Jukcy;'=>"\u{404}",'KHcy;'=>"\u{425}",'KJcy;'=>"\u{40c}",'Kappa;'=>"\u{39a}",'Kcedil;'=>"\u{136}",'Kcy;'=>"\u{41a}",'Kfr;'=>"\u{1d50e}",'Kopf;'=>"\u{1d542}",'Kscr;'=>"\u{1d4a6}",'LJcy;'=>"\u{409}",'LT'=>"\u{3c}",'LT;'=>"\u{3c}",'Lacute;'=>"\u{139}",'Lambda;'=>"\u{39b}",'Lang;'=>"\u{27ea}",'Laplacetrf;'=>"\u{2112}",'Larr;'=>"\u{219e}",'Lcaron;'=>"\u{13d}",'Lcedil;'=>"\u{13b}",'Lcy;'=>"\u{41b}",'LeftAngleBracket;'=>"\u{27e8}",'LeftArrow;'=>"\u{2190}",'LeftArrowBar;'=>"\u{21e4}",'LeftArrowRightArrow;'=>"\u{21c6}",'LeftCeiling;'=>"\u{2308}",'LeftDoubleBracket;'=>"\u{27e6}",'LeftDownTeeVector;'=>"\u{2961}",'LeftDownVector;'=>"\u{21c3}",'LeftDownVectorBar;'=>"\u{2959}",'LeftFloor;'=>"\u{230a}",'LeftRightArrow;'=>"\u{2194}",'LeftRightVector;'=>"\u{294e}",'LeftTee;'=>"\u{22a3}",'LeftTeeArrow;'=>"\u{21a4}",'LeftTeeVector;'=>"\u{295a}",'LeftTriangle;'=>"\u{22b2}",'LeftTriangleBar;'=>"\u{29cf}",'LeftTriangleEqual;'=>"\u{22b4}",'LeftUpDownVector;'=>"\u{2951}",'LeftUpTeeVector;'=>"\u{2960}",'LeftUpVector;'=>"\u{21bf}",'LeftUpVectorBar;'=>"\u{2958}",'LeftVector;'=>"\u{21bc}",'LeftVectorBar;'=>"\u{2952}",'Leftarrow;'=>"\u{21d0}",'Leftrightarrow;'=>"\u{21d4}",'LessEqualGreater;'=>"\u{22da}",'LessFullEqual;'=>"\u{2266}",'LessGreater;'=>"\u{2276}",'LessLess;'=>"\u{2aa1}",'LessSlantEqual;'=>"\u{2a7d}",'LessTilde;'=>"\u{2272}",'Lfr;'=>"\u{1d50f}",'Ll;'=>"\u{22d8}",'Lleftarrow;'=>"\u{21da}",'Lmidot;'=>"\u{13f}",'LongLeftArrow;'=>"\u{27f5}",'LongLeftRightArrow;'=>"\u{27f7}",'LongRightArrow;'=>"\u{27f6}",'Longleftarrow;'=>"\u{27f8}",'Longleftrightarrow;'=>"\u{27fa}",'Longrightarrow;'=>"\u{27f9}",'Lopf;'=>"\u{1d543}",'LowerLeftArrow;'=>"\u{2199}",'LowerRightArrow;'=>"\u{2198}",'Lscr;'=>"\u{2112}",'Lsh;'=>"\u{21b0}",'Lstrok;'=>"\u{141}",'Lt;'=>"\u{226a}",'Map;'=>"\u{2905}",'Mcy;'=>"\u{41c}",'MediumSpace;'=>"\u{205f}",'Mellintrf;'=>"\u{2133}",'Mfr;'=>"\u{1d510}",'MinusPlus;'=>"\u{2213}",'Mopf;'=>"\u{1d544}",'Mscr;'=>"\u{2133}",'Mu;'=>"\u{39c}",'NJcy;'=>"\u{40a}",'Nacute;'=>"\u{143}",'Ncaron;'=>"\u{147}",'Ncedil;'=>"\u{145}",'Ncy;'=>"\u{41d}",'NegativeMediumSpace;'=>"\u{200b}",'NegativeThickSpace;'=>"\u{200b}",'NegativeThinSpace;'=>"\u{200b}",'NegativeVeryThinSpace;'=>"\u{200b}",'NestedGreaterGreater;'=>"\u{226b}",'NestedLessLess;'=>"\u{226a}",'NewLine;'=>"\u{a}",'Nfr;'=>"\u{1d511}",'NoBreak;'=>"\u{2060}",'NonBreakingSpace;'=>"\u{a0}",'Nopf;'=>"\u{2115}",'Not;'=>"\u{2aec}",'NotCongruent;'=>"\u{2262}",'NotCupCap;'=>"\u{226d}",'NotDoubleVerticalBar;'=>"\u{2226}",'NotElement;'=>"\u{2209}",'NotEqual;'=>"\u{2260}",'NotEqualTilde;'=>"\u{2242}\u{338}",'NotExists;'=>"\u{2204}",'NotGreater;'=>"\u{226f}",'NotGreaterEqual;'=>"\u{2271}",'NotGreaterFullEqual;'=>"\u{2267}\u{338}",'NotGreaterGreater;'=>"\u{226b}\u{338}",'NotGreaterLess;'=>"\u{2279}",'NotGreaterSlantEqual;'=>"\u{2a7e}\u{338}",'NotGreaterTilde;'=>"\u{2275}",'NotHumpDownHump;'=>"\u{224e}\u{338}",'NotHumpEqual;'=>"\u{224f}\u{338}",'NotLeftTriangle;'=>"\u{22ea}",'NotLeftTriangleBar;'=>"\u{29cf}\u{338}",'NotLeftTriangleEqual;'=>"\u{22ec}",'NotLess;'=>"\u{226e}",'NotLessEqual;'=>"\u{2270}",'NotLessGreater;'=>"\u{2278}",'NotLessLess;'=>"\u{226a}\u{338}",'NotLessSlantEqual;'=>"\u{2a7d}\u{338}",'NotLessTilde;'=>"\u{2274}",'NotNestedGreaterGreater;'=>"\u{2aa2}\u{338}",'NotNestedLessLess;'=>"\u{2aa1}\u{338}",'NotPrecedes;'=>"\u{2280}",'NotPrecedesEqual;'=>"\u{2aaf}\u{338}",'NotPrecedesSlantEqual;'=>"\u{22e0}",'NotReverseElement;'=>"\u{220c}",'NotRightTriangle;'=>"\u{22eb}",'NotRightTriangleBar;'=>"\u{29d0}\u{338}",'NotRightTriangleEqual;'=>"\u{22ed}",'NotSquareSubset;'=>"\u{228f}\u{338}",'NotSquareSubsetEqual;'=>"\u{22e2}",'NotSquareSuperset;'=>"\u{2290}\u{338}",'NotSquareSupersetEqual;'=>"\u{22e3}",'NotSubset;'=>"\u{2282}\u{20d2}",'NotSubsetEqual;'=>"\u{2288}",'NotSucceeds;'=>"\u{2281}",'NotSucceedsEqual;'=>"\u{2ab0}\u{338}",'NotSucceedsSlantEqual;'=>"\u{22e1}",'NotSucceedsTilde;'=>"\u{227f}\u{338}",'NotSuperset;'=>"\u{2283}\u{20d2}",'NotSupersetEqual;'=>"\u{2289}",'NotTilde;'=>"\u{2241}",'NotTildeEqual;'=>"\u{2244}",'NotTildeFullEqual;'=>"\u{2247}",'NotTildeTilde;'=>"\u{2249}",'NotVerticalBar;'=>"\u{2224}",'Nscr;'=>"\u{1d4a9}",'Ntilde'=>"\u{d1}",'Ntilde;'=>"\u{d1}",'Nu;'=>"\u{39d}",'OElig;'=>"\u{152}",'Oacute'=>"\u{d3}",'Oacute;'=>"\u{d3}",'Ocirc'=>"\u{d4}",'Ocirc;'=>"\u{d4}",'Ocy;'=>"\u{41e}",'Odblac;'=>"\u{150}",'Ofr;'=>"\u{1d512}",'Ograve'=>"\u{d2}",'Ograve;'=>"\u{d2}",'Omacr;'=>"\u{14c}",'Omega;'=>"\u{3a9}",'Omicron;'=>"\u{39f}",'Oopf;'=>"\u{1d546}",'OpenCurlyDoubleQuote;'=>"\u{201c}",'OpenCurlyQuote;'=>"\u{2018}",'Or;'=>"\u{2a54}",'Oscr;'=>"\u{1d4aa}",'Oslash'=>"\u{d8}",'Oslash;'=>"\u{d8}",'Otilde'=>"\u{d5}",'Otilde;'=>"\u{d5}",'Otimes;'=>"\u{2a37}",'Ouml'=>"\u{d6}",'Ouml;'=>"\u{d6}",'OverBar;'=>"\u{203e}",'OverBrace;'=>"\u{23de}",'OverBracket;'=>"\u{23b4}",'OverParenthesis;'=>"\u{23dc}",'PartialD;'=>"\u{2202}",'Pcy;'=>"\u{41f}",'Pfr;'=>"\u{1d513}",'Phi;'=>"\u{3a6}",'Pi;'=>"\u{3a0}",'PlusMinus;'=>"\u{b1}",'Poincareplane;'=>"\u{210c}",'Popf;'=>"\u{2119}",'Pr;'=>"\u{2abb}",'Precedes;'=>"\u{227a}",'PrecedesEqual;'=>"\u{2aaf}",'PrecedesSlantEqual;'=>"\u{227c}",'PrecedesTilde;'=>"\u{227e}",'Prime;'=>"\u{2033}",'Product;'=>"\u{220f}",'Proportion;'=>"\u{2237}",'Proportional;'=>"\u{221d}",'Pscr;'=>"\u{1d4ab}",'Psi;'=>"\u{3a8}",'QUOT'=>"\u{22}",'QUOT;'=>"\u{22}",'Qfr;'=>"\u{1d514}",'Qopf;'=>"\u{211a}",'Qscr;'=>"\u{1d4ac}",'RBarr;'=>"\u{2910}",'REG'=>"\u{ae}",'REG;'=>"\u{ae}",'Racute;'=>"\u{154}",'Rang;'=>"\u{27eb}",'Rarr;'=>"\u{21a0}",'Rarrtl;'=>"\u{2916}",'Rcaron;'=>"\u{158}",'Rcedil;'=>"\u{156}",'Rcy;'=>"\u{420}",'Re;'=>"\u{211c}",'ReverseElement;'=>"\u{220b}",'ReverseEquilibrium;'=>"\u{21cb}",'ReverseUpEquilibrium;'=>"\u{296f}",'Rfr;'=>"\u{211c}",'Rho;'=>"\u{3a1}",'RightAngleBracket;'=>"\u{27e9}",'RightArrow;'=>"\u{2192}",'RightArrowBar;'=>"\u{21e5}",'RightArrowLeftArrow;'=>"\u{21c4}",'RightCeiling;'=>"\u{2309}",'RightDoubleBracket;'=>"\u{27e7}",'RightDownTeeVector;'=>"\u{295d}",'RightDownVector;'=>"\u{21c2}",'RightDownVectorBar;'=>"\u{2955}",'RightFloor;'=>"\u{230b}",'RightTee;'=>"\u{22a2}",'RightTeeArrow;'=>"\u{21a6}",'RightTeeVector;'=>"\u{295b}",'RightTriangle;'=>"\u{22b3}",'RightTriangleBar;'=>"\u{29d0}",'RightTriangleEqual;'=>"\u{22b5}",'RightUpDownVector;'=>"\u{294f}",'RightUpTeeVector;'=>"\u{295c}",'RightUpVector;'=>"\u{21be}",'RightUpVectorBar;'=>"\u{2954}",'RightVector;'=>"\u{21c0}",'RightVectorBar;'=>"\u{2953}",'Rightarrow;'=>"\u{21d2}",'Ropf;'=>"\u{211d}",'RoundImplies;'=>"\u{2970}",'Rrightarrow;'=>"\u{21db}",'Rscr;'=>"\u{211b}",'Rsh;'=>"\u{21b1}",'RuleDelayed;'=>"\u{29f4}",'SHCHcy;'=>"\u{429}",'SHcy;'=>"\u{428}",'SOFTcy;'=>"\u{42c}",'Sacute;'=>"\u{15a}",'Sc;'=>"\u{2abc}",'Scaron;'=>"\u{160}",'Scedil;'=>"\u{15e}",'Scirc;'=>"\u{15c}",'Scy;'=>"\u{421}",'Sfr;'=>"\u{1d516}",'ShortDownArrow;'=>"\u{2193}",'ShortLeftArrow;'=>"\u{2190}",'ShortRightArrow;'=>"\u{2192}",'ShortUpArrow;'=>"\u{2191}",'Sigma;'=>"\u{3a3}",'SmallCircle;'=>"\u{2218}",'Sopf;'=>"\u{1d54a}",'Sqrt;'=>"\u{221a}",'Square;'=>"\u{25a1}",'SquareIntersection;'=>"\u{2293}",'SquareSubset;'=>"\u{228f}",'SquareSubsetEqual;'=>"\u{2291}",'SquareSuperset;'=>"\u{2290}",'SquareSupersetEqual;'=>"\u{2292}",'SquareUnion;'=>"\u{2294}",'Sscr;'=>"\u{1d4ae}",'Star;'=>"\u{22c6}",'Sub;'=>"\u{22d0}",'Subset;'=>"\u{22d0}",'SubsetEqual;'=>"\u{2286}",'Succeeds;'=>"\u{227b}",'SucceedsEqual;'=>"\u{2ab0}",'SucceedsSlantEqual;'=>"\u{227d}",'SucceedsTilde;'=>"\u{227f}",'SuchThat;'=>"\u{220b}",'Sum;'=>"\u{2211}",'Sup;'=>"\u{22d1}",'Superset;'=>"\u{2283}",'SupersetEqual;'=>"\u{2287}",'Supset;'=>"\u{22d1}",'THORN'=>"\u{de}",'THORN;'=>"\u{de}",'TRADE;'=>"\u{2122}",'TSHcy;'=>"\u{40b}",'TScy;'=>"\u{426}",'Tab;'=>"\u{9}",'Tau;'=>"\u{3a4}",'Tcaron;'=>"\u{164}",'Tcedil;'=>"\u{162}",'Tcy;'=>"\u{422}",'Tfr;'=>"\u{1d517}",'Therefore;'=>"\u{2234}",'Theta;'=>"\u{398}",'ThickSpace;'=>"\u{205f}\u{200a}",'ThinSpace;'=>"\u{2009}",'Tilde;'=>"\u{223c}",'TildeEqual;'=>"\u{2243}",'TildeFullEqual;'=>"\u{2245}",'TildeTilde;'=>"\u{2248}",'Topf;'=>"\u{1d54b}",'TripleDot;'=>"\u{20db}",'Tscr;'=>"\u{1d4af}",'Tstrok;'=>"\u{166}",'Uacute'=>"\u{da}",'Uacute;'=>"\u{da}",'Uarr;'=>"\u{219f}",'Uarrocir;'=>"\u{2949}",'Ubrcy;'=>"\u{40e}",'Ubreve;'=>"\u{16c}",'Ucirc'=>"\u{db}",'Ucirc;'=>"\u{db}",'Ucy;'=>"\u{423}",'Udblac;'=>"\u{170}",'Ufr;'=>"\u{1d518}",'Ugrave'=>"\u{d9}",'Ugrave;'=>"\u{d9}",'Umacr;'=>"\u{16a}",'UnderBar;'=>"\u{5f}",'UnderBrace;'=>"\u{23df}",'UnderBracket;'=>"\u{23b5}",'UnderParenthesis;'=>"\u{23dd}",'Union;'=>"\u{22c3}",'UnionPlus;'=>"\u{228e}",'Uogon;'=>"\u{172}",'Uopf;'=>"\u{1d54c}",'UpArrow;'=>"\u{2191}",'UpArrowBar;'=>"\u{2912}",'UpArrowDownArrow;'=>"\u{21c5}",'UpDownArrow;'=>"\u{2195}",'UpEquilibrium;'=>"\u{296e}",'UpTee;'=>"\u{22a5}",'UpTeeArrow;'=>"\u{21a5}",'Uparrow;'=>"\u{21d1}",'Updownarrow;'=>"\u{21d5}",'UpperLeftArrow;'=>"\u{2196}",'UpperRightArrow;'=>"\u{2197}",'Upsi;'=>"\u{3d2}",'Upsilon;'=>"\u{3a5}",'Uring;'=>"\u{16e}",'Uscr;'=>"\u{1d4b0}",'Utilde;'=>"\u{168}",'Uuml'=>"\u{dc}",'Uuml;'=>"\u{dc}",'VDash;'=>"\u{22ab}",'Vbar;'=>"\u{2aeb}",'Vcy;'=>"\u{412}",'Vdash;'=>"\u{22a9}",'Vdashl;'=>"\u{2ae6}",'Vee;'=>"\u{22c1}",'Verbar;'=>"\u{2016}",'Vert;'=>"\u{2016}",'VerticalBar;'=>"\u{2223}",'VerticalLine;'=>"\u{7c}",'VerticalSeparator;'=>"\u{2758}",'VerticalTilde;'=>"\u{2240}",'VeryThinSpace;'=>"\u{200a}",'Vfr;'=>"\u{1d519}",'Vopf;'=>"\u{1d54d}",'Vscr;'=>"\u{1d4b1}",'Vvdash;'=>"\u{22aa}",'Wcirc;'=>"\u{174}",'Wedge;'=>"\u{22c0}",'Wfr;'=>"\u{1d51a}",'Wopf;'=>"\u{1d54e}",'Wscr;'=>"\u{1d4b2}",'Xfr;'=>"\u{1d51b}",'Xi;'=>"\u{39e}",'Xopf;'=>"\u{1d54f}",'Xscr;'=>"\u{1d4b3}",'YAcy;'=>"\u{42f}",'YIcy;'=>"\u{407}",'YUcy;'=>"\u{42e}",'Yacute'=>"\u{dd}",'Yacute;'=>"\u{dd}",'Ycirc;'=>"\u{176}",'Ycy;'=>"\u{42b}",'Yfr;'=>"\u{1d51c}",'Yopf;'=>"\u{1d550}",'Yscr;'=>"\u{1d4b4}",'Yuml;'=>"\u{178}",'ZHcy;'=>"\u{416}",'Zacute;'=>"\u{179}",'Zcaron;'=>"\u{17d}",'Zcy;'=>"\u{417}",'Zdot;'=>"\u{17b}",'ZeroWidthSpace;'=>"\u{200b}",'Zeta;'=>"\u{396}",'Zfr;'=>"\u{2128}",'Zopf;'=>"\u{2124}",'Zscr;'=>"\u{1d4b5}",'aacute'=>"\u{e1}",'aacute;'=>"\u{e1}",'abreve;'=>"\u{103}",'ac;'=>"\u{223e}",'acE;'=>"\u{223e}\u{333}",'acd;'=>"\u{223f}",'acirc'=>"\u{e2}",'acirc;'=>"\u{e2}",'acute'=>"\u{b4}",'acute;'=>"\u{b4}",'acy;'=>"\u{430}",'aelig'=>"\u{e6}",'aelig;'=>"\u{e6}",'af;'=>"\u{2061}",'afr;'=>"\u{1d51e}",'agrave'=>"\u{e0}",'agrave;'=>"\u{e0}",'alefsym;'=>"\u{2135}",'aleph;'=>"\u{2135}",'alpha;'=>"\u{3b1}",'amacr;'=>"\u{101}",'amalg;'=>"\u{2a3f}",'amp'=>"\u{26}",'amp;'=>"\u{26}",'and;'=>"\u{2227}",'andand;'=>"\u{2a55}",'andd;'=>"\u{2a5c}",'andslope;'=>"\u{2a58}",'andv;'=>"\u{2a5a}",'ang;'=>"\u{2220}",'ange;'=>"\u{29a4}",'angle;'=>"\u{2220}",'angmsd;'=>"\u{2221}",'angmsdaa;'=>"\u{29a8}",'angmsdab;'=>"\u{29a9}",'angmsdac;'=>"\u{29aa}",'angmsdad;'=>"\u{29ab}",'angmsdae;'=>"\u{29ac}",'angmsdaf;'=>"\u{29ad}",'angmsdag;'=>"\u{29ae}",'angmsdah;'=>"\u{29af}",'angrt;'=>"\u{221f}",'angrtvb;'=>"\u{22be}",'angrtvbd;'=>"\u{299d}",'angsph;'=>"\u{2222}",'angst;'=>"\u{c5}",'angzarr;'=>"\u{237c}",'aogon;'=>"\u{105}",'aopf;'=>"\u{1d552}",'ap;'=>"\u{2248}",'apE;'=>"\u{2a70}",'apacir;'=>"\u{2a6f}",'ape;'=>"\u{224a}",'apid;'=>"\u{224b}",'apos;'=>"\u{27}",'approx;'=>"\u{2248}",'approxeq;'=>"\u{224a}",'aring'=>"\u{e5}",'aring;'=>"\u{e5}",'ascr;'=>"\u{1d4b6}",'ast;'=>"\u{2a}",'asymp;'=>"\u{2248}",'asympeq;'=>"\u{224d}",'atilde'=>"\u{e3}",'atilde;'=>"\u{e3}",'auml'=>"\u{e4}",'auml;'=>"\u{e4}",'awconint;'=>"\u{2233}",'awint;'=>"\u{2a11}",'bNot;'=>"\u{2aed}",'backcong;'=>"\u{224c}",'backepsilon;'=>"\u{3f6}",'backprime;'=>"\u{2035}",'backsim;'=>"\u{223d}",'backsimeq;'=>"\u{22cd}",'barvee;'=>"\u{22bd}",'barwed;'=>"\u{2305}",'barwedge;'=>"\u{2305}",'bbrk;'=>"\u{23b5}",'bbrktbrk;'=>"\u{23b6}",'bcong;'=>"\u{224c}",'bcy;'=>"\u{431}",'bdquo;'=>"\u{201e}",'becaus;'=>"\u{2235}",'because;'=>"\u{2235}",'bemptyv;'=>"\u{29b0}",'bepsi;'=>"\u{3f6}",'bernou;'=>"\u{212c}",'beta;'=>"\u{3b2}",'beth;'=>"\u{2136}",'between;'=>"\u{226c}",'bfr;'=>"\u{1d51f}",'bigcap;'=>"\u{22c2}",'bigcirc;'=>"\u{25ef}",'bigcup;'=>"\u{22c3}",'bigodot;'=>"\u{2a00}",'bigoplus;'=>"\u{2a01}",'bigotimes;'=>"\u{2a02}",'bigsqcup;'=>"\u{2a06}",'bigstar;'=>"\u{2605}",'bigtriangledown;'=>"\u{25bd}",'bigtriangleup;'=>"\u{25b3}",'biguplus;'=>"\u{2a04}",'bigvee;'=>"\u{22c1}",'bigwedge;'=>"\u{22c0}",'bkarow;'=>"\u{290d}",'blacklozenge;'=>"\u{29eb}",'blacksquare;'=>"\u{25aa}",'blacktriangle;'=>"\u{25b4}",'blacktriangledown;'=>"\u{25be}",'blacktriangleleft;'=>"\u{25c2}",'blacktriangleright;'=>"\u{25b8}",'blank;'=>"\u{2423}",'blk12;'=>"\u{2592}",'blk14;'=>"\u{2591}",'blk34;'=>"\u{2593}",'block;'=>"\u{2588}",'bne;'=>"\u{3d}\u{20e5}",'bnequiv;'=>"\u{2261}\u{20e5}",'bnot;'=>"\u{2310}",'bopf;'=>"\u{1d553}",'bot;'=>"\u{22a5}",'bottom;'=>"\u{22a5}",'bowtie;'=>"\u{22c8}",'boxDL;'=>"\u{2557}",'boxDR;'=>"\u{2554}",'boxDl;'=>"\u{2556}",'boxDr;'=>"\u{2553}",'boxH;'=>"\u{2550}",'boxHD;'=>"\u{2566}",'boxHU;'=>"\u{2569}",'boxHd;'=>"\u{2564}",'boxHu;'=>"\u{2567}",'boxUL;'=>"\u{255d}",'boxUR;'=>"\u{255a}",'boxUl;'=>"\u{255c}",'boxUr;'=>"\u{2559}",'boxV;'=>"\u{2551}",'boxVH;'=>"\u{256c}",'boxVL;'=>"\u{2563}",'boxVR;'=>"\u{2560}",'boxVh;'=>"\u{256b}",'boxVl;'=>"\u{2562}",'boxVr;'=>"\u{255f}",'boxbox;'=>"\u{29c9}",'boxdL;'=>"\u{2555}",'boxdR;'=>"\u{2552}",'boxdl;'=>"\u{2510}",'boxdr;'=>"\u{250c}",'boxh;'=>"\u{2500}",'boxhD;'=>"\u{2565}",'boxhU;'=>"\u{2568}",'boxhd;'=>"\u{252c}",'boxhu;'=>"\u{2534}",'boxminus;'=>"\u{229f}",'boxplus;'=>"\u{229e}",'boxtimes;'=>"\u{22a0}",'boxuL;'=>"\u{255b}",'boxuR;'=>"\u{2558}",'boxul;'=>"\u{2518}",'boxur;'=>"\u{2514}",'boxv;'=>"\u{2502}",'boxvH;'=>"\u{256a}",'boxvL;'=>"\u{2561}",'boxvR;'=>"\u{255e}",'boxvh;'=>"\u{253c}",'boxvl;'=>"\u{2524}",'boxvr;'=>"\u{251c}",'bprime;'=>"\u{2035}",'breve;'=>"\u{2d8}",'brvbar'=>"\u{a6}",'brvbar;'=>"\u{a6}",'bscr;'=>"\u{1d4b7}",'bsemi;'=>"\u{204f}",'bsim;'=>"\u{223d}",'bsime;'=>"\u{22cd}",'bsol;'=>"\u{5c}",'bsolb;'=>"\u{29c5}",'bsolhsub;'=>"\u{27c8}",'bull;'=>"\u{2022}",'bullet;'=>"\u{2022}",'bump;'=>"\u{224e}",'bumpE;'=>"\u{2aae}",'bumpe;'=>"\u{224f}",'bumpeq;'=>"\u{224f}",'cacute;'=>"\u{107}",'cap;'=>"\u{2229}",'capand;'=>"\u{2a44}",'capbrcup;'=>"\u{2a49}",'capcap;'=>"\u{2a4b}",'capcup;'=>"\u{2a47}",'capdot;'=>"\u{2a40}",'caps;'=>"\u{2229}\u{fe00}",'caret;'=>"\u{2041}",'caron;'=>"\u{2c7}",'ccaps;'=>"\u{2a4d}",'ccaron;'=>"\u{10d}",'ccedil'=>"\u{e7}",'ccedil;'=>"\u{e7}",'ccirc;'=>"\u{109}",'ccups;'=>"\u{2a4c}",'ccupssm;'=>"\u{2a50}",'cdot;'=>"\u{10b}",'cedil'=>"\u{b8}",'cedil;'=>"\u{b8}",'cemptyv;'=>"\u{29b2}",'cent'=>"\u{a2}",'cent;'=>"\u{a2}",'centerdot;'=>"\u{b7}",'cfr;'=>"\u{1d520}",'chcy;'=>"\u{447}",'check;'=>"\u{2713}",'checkmark;'=>"\u{2713}",'chi;'=>"\u{3c7}",'cir;'=>"\u{25cb}",'cirE;'=>"\u{29c3}",'circ;'=>"\u{2c6}",'circeq;'=>"\u{2257}",'circlearrowleft;'=>"\u{21ba}",'circlearrowright;'=>"\u{21bb}",'circledR;'=>"\u{ae}",'circledS;'=>"\u{24c8}",'circledast;'=>"\u{229b}",'circledcirc;'=>"\u{229a}",'circleddash;'=>"\u{229d}",'cire;'=>"\u{2257}",'cirfnint;'=>"\u{2a10}",'cirmid;'=>"\u{2aef}",'cirscir;'=>"\u{29c2}",'clubs;'=>"\u{2663}",'clubsuit;'=>"\u{2663}",'colon;'=>"\u{3a}",'colone;'=>"\u{2254}",'coloneq;'=>"\u{2254}",'comma;'=>"\u{2c}",'commat;'=>"\u{40}",'comp;'=>"\u{2201}",'compfn;'=>"\u{2218}",'complement;'=>"\u{2201}",'complexes;'=>"\u{2102}",'cong;'=>"\u{2245}",'congdot;'=>"\u{2a6d}",'conint;'=>"\u{222e}",'copf;'=>"\u{1d554}",'coprod;'=>"\u{2210}",'copy'=>"\u{a9}",'copy;'=>"\u{a9}",'copysr;'=>"\u{2117}",'crarr;'=>"\u{21b5}",'cross;'=>"\u{2717}",'cscr;'=>"\u{1d4b8}",'csub;'=>"\u{2acf}",'csube;'=>"\u{2ad1}",'csup;'=>"\u{2ad0}",'csupe;'=>"\u{2ad2}",'ctdot;'=>"\u{22ef}",'cudarrl;'=>"\u{2938}",'cudarrr;'=>"\u{2935}",'cuepr;'=>"\u{22de}",'cuesc;'=>"\u{22df}",'cularr;'=>"\u{21b6}",'cularrp;'=>"\u{293d}",'cup;'=>"\u{222a}",'cupbrcap;'=>"\u{2a48}",'cupcap;'=>"\u{2a46}",'cupcup;'=>"\u{2a4a}",'cupdot;'=>"\u{228d}",'cupor;'=>"\u{2a45}",'cups;'=>"\u{222a}\u{fe00}",'curarr;'=>"\u{21b7}",'curarrm;'=>"\u{293c}",'curlyeqprec;'=>"\u{22de}",'curlyeqsucc;'=>"\u{22df}",'curlyvee;'=>"\u{22ce}",'curlywedge;'=>"\u{22cf}",'curren'=>"\u{a4}",'curren;'=>"\u{a4}",'curvearrowleft;'=>"\u{21b6}",'curvearrowright;'=>"\u{21b7}",'cuvee;'=>"\u{22ce}",'cuwed;'=>"\u{22cf}",'cwconint;'=>"\u{2232}",'cwint;'=>"\u{2231}",'cylcty;'=>"\u{232d}",'dArr;'=>"\u{21d3}",'dHar;'=>"\u{2965}",'dagger;'=>"\u{2020}",'daleth;'=>"\u{2138}",'darr;'=>"\u{2193}",'dash;'=>"\u{2010}",'dashv;'=>"\u{22a3}",'dbkarow;'=>"\u{290f}",'dblac;'=>"\u{2dd}",'dcaron;'=>"\u{10f}",'dcy;'=>"\u{434}",'dd;'=>"\u{2146}",'ddagger;'=>"\u{2021}",'ddarr;'=>"\u{21ca}",'ddotseq;'=>"\u{2a77}",'deg'=>"\u{b0}",'deg;'=>"\u{b0}",'delta;'=>"\u{3b4}",'demptyv;'=>"\u{29b1}",'dfisht;'=>"\u{297f}",'dfr;'=>"\u{1d521}",'dharl;'=>"\u{21c3}",'dharr;'=>"\u{21c2}",'diam;'=>"\u{22c4}",'diamond;'=>"\u{22c4}",'diamondsuit;'=>"\u{2666}",'diams;'=>"\u{2666}",'die;'=>"\u{a8}",'digamma;'=>"\u{3dd}",'disin;'=>"\u{22f2}",'div;'=>"\u{f7}",'divide'=>"\u{f7}",'divide;'=>"\u{f7}",'divideontimes;'=>"\u{22c7}",'divonx;'=>"\u{22c7}",'djcy;'=>"\u{452}",'dlcorn;'=>"\u{231e}",'dlcrop;'=>"\u{230d}",'dollar;'=>"\u{24}",'dopf;'=>"\u{1d555}",'dot;'=>"\u{2d9}",'doteq;'=>"\u{2250}",'doteqdot;'=>"\u{2251}",'dotminus;'=>"\u{2238}",'dotplus;'=>"\u{2214}",'dotsquare;'=>"\u{22a1}",'doublebarwedge;'=>"\u{2306}",'downarrow;'=>"\u{2193}",'downdownarrows;'=>"\u{21ca}",'downharpoonleft;'=>"\u{21c3}",'downharpoonright;'=>"\u{21c2}",'drbkarow;'=>"\u{2910}",'drcorn;'=>"\u{231f}",'drcrop;'=>"\u{230c}",'dscr;'=>"\u{1d4b9}",'dscy;'=>"\u{455}",'dsol;'=>"\u{29f6}",'dstrok;'=>"\u{111}",'dtdot;'=>"\u{22f1}",'dtri;'=>"\u{25bf}",'dtrif;'=>"\u{25be}",'duarr;'=>"\u{21f5}",'duhar;'=>"\u{296f}",'dwangle;'=>"\u{29a6}",'dzcy;'=>"\u{45f}",'dzigrarr;'=>"\u{27ff}",'eDDot;'=>"\u{2a77}",'eDot;'=>"\u{2251}",'eacute'=>"\u{e9}",'eacute;'=>"\u{e9}",'easter;'=>"\u{2a6e}",'ecaron;'=>"\u{11b}",'ecir;'=>"\u{2256}",'ecirc'=>"\u{ea}",'ecirc;'=>"\u{ea}",'ecolon;'=>"\u{2255}",'ecy;'=>"\u{44d}",'edot;'=>"\u{117}",'ee;'=>"\u{2147}",'efDot;'=>"\u{2252}",'efr;'=>"\u{1d522}",'eg;'=>"\u{2a9a}",'egrave'=>"\u{e8}",'egrave;'=>"\u{e8}",'egs;'=>"\u{2a96}",'egsdot;'=>"\u{2a98}",'el;'=>"\u{2a99}",'elinters;'=>"\u{23e7}",'ell;'=>"\u{2113}",'els;'=>"\u{2a95}",'elsdot;'=>"\u{2a97}",'emacr;'=>"\u{113}",'empty;'=>"\u{2205}",'emptyset;'=>"\u{2205}",'emptyv;'=>"\u{2205}",'emsp13;'=>"\u{2004}",'emsp14;'=>"\u{2005}",'emsp;'=>"\u{2003}",'eng;'=>"\u{14b}",'ensp;'=>"\u{2002}",'eogon;'=>"\u{119}",'eopf;'=>"\u{1d556}",'epar;'=>"\u{22d5}",'eparsl;'=>"\u{29e3}",'eplus;'=>"\u{2a71}",'epsi;'=>"\u{3b5}",'epsilon;'=>"\u{3b5}",'epsiv;'=>"\u{3f5}",'eqcirc;'=>"\u{2256}",'eqcolon;'=>"\u{2255}",'eqsim;'=>"\u{2242}",'eqslantgtr;'=>"\u{2a96}",'eqslantless;'=>"\u{2a95}",'equals;'=>"\u{3d}",'equest;'=>"\u{225f}",'equiv;'=>"\u{2261}",'equivDD;'=>"\u{2a78}",'eqvparsl;'=>"\u{29e5}",'erDot;'=>"\u{2253}",'erarr;'=>"\u{2971}",'escr;'=>"\u{212f}",'esdot;'=>"\u{2250}",'esim;'=>"\u{2242}",'eta;'=>"\u{3b7}",'eth'=>"\u{f0}",'eth;'=>"\u{f0}",'euml'=>"\u{eb}",'euml;'=>"\u{eb}",'euro;'=>"\u{20ac}",'excl;'=>"\u{21}",'exist;'=>"\u{2203}",'expectation;'=>"\u{2130}",'exponentiale;'=>"\u{2147}",'fallingdotseq;'=>"\u{2252}",'fcy;'=>"\u{444}",'female;'=>"\u{2640}",'ffilig;'=>"\u{fb03}",'fflig;'=>"\u{fb00}",'ffllig;'=>"\u{fb04}",'ffr;'=>"\u{1d523}",'filig;'=>"\u{fb01}",'fjlig;'=>"\u{66}\u{6a}",'flat;'=>"\u{266d}",'fllig;'=>"\u{fb02}",'fltns;'=>"\u{25b1}",'fnof;'=>"\u{192}",'fopf;'=>"\u{1d557}",'forall;'=>"\u{2200}",'fork;'=>"\u{22d4}",'forkv;'=>"\u{2ad9}",'fpartint;'=>"\u{2a0d}",'frac12'=>"\u{bd}",'frac12;'=>"\u{bd}",'frac13;'=>"\u{2153}",'frac14'=>"\u{bc}",'frac14;'=>"\u{bc}",'frac15;'=>"\u{2155}",'frac16;'=>"\u{2159}",'frac18;'=>"\u{215b}",'frac23;'=>"\u{2154}",'frac25;'=>"\u{2156}",'frac34'=>"\u{be}",'frac34;'=>"\u{be}",'frac35;'=>"\u{2157}",'frac38;'=>"\u{215c}",'frac45;'=>"\u{2158}",'frac56;'=>"\u{215a}",'frac58;'=>"\u{215d}",'frac78;'=>"\u{215e}",'frasl;'=>"\u{2044}",'frown;'=>"\u{2322}",'fscr;'=>"\u{1d4bb}",'gE;'=>"\u{2267}",'gEl;'=>"\u{2a8c}",'gacute;'=>"\u{1f5}",'gamma;'=>"\u{3b3}",'gammad;'=>"\u{3dd}",'gap;'=>"\u{2a86}",'gbreve;'=>"\u{11f}",'gcirc;'=>"\u{11d}",'gcy;'=>"\u{433}",'gdot;'=>"\u{121}",'ge;'=>"\u{2265}",'gel;'=>"\u{22db}",'geq;'=>"\u{2265}",'geqq;'=>"\u{2267}",'geqslant;'=>"\u{2a7e}",'ges;'=>"\u{2a7e}",'gescc;'=>"\u{2aa9}",'gesdot;'=>"\u{2a80}",'gesdoto;'=>"\u{2a82}",'gesdotol;'=>"\u{2a84}",'gesl;'=>"\u{22db}\u{fe00}",'gesles;'=>"\u{2a94}",'gfr;'=>"\u{1d524}",'gg;'=>"\u{226b}",'ggg;'=>"\u{22d9}",'gimel;'=>"\u{2137}",'gjcy;'=>"\u{453}",'gl;'=>"\u{2277}",'glE;'=>"\u{2a92}",'gla;'=>"\u{2aa5}",'glj;'=>"\u{2aa4}",'gnE;'=>"\u{2269}",'gnap;'=>"\u{2a8a}",'gnapprox;'=>"\u{2a8a}",'gne;'=>"\u{2a88}",'gneq;'=>"\u{2a88}",'gneqq;'=>"\u{2269}",'gnsim;'=>"\u{22e7}",'gopf;'=>"\u{1d558}",'grave;'=>"\u{60}",'gscr;'=>"\u{210a}",'gsim;'=>"\u{2273}",'gsime;'=>"\u{2a8e}",'gsiml;'=>"\u{2a90}",'gt'=>"\u{3e}",'gt;'=>"\u{3e}",'gtcc;'=>"\u{2aa7}",'gtcir;'=>"\u{2a7a}",'gtdot;'=>"\u{22d7}",'gtlPar;'=>"\u{2995}",'gtquest;'=>"\u{2a7c}",'gtrapprox;'=>"\u{2a86}",'gtrarr;'=>"\u{2978}",'gtrdot;'=>"\u{22d7}",'gtreqless;'=>"\u{22db}",'gtreqqless;'=>"\u{2a8c}",'gtrless;'=>"\u{2277}",'gtrsim;'=>"\u{2273}",'gvertneqq;'=>"\u{2269}\u{fe00}",'gvnE;'=>"\u{2269}\u{fe00}",'hArr;'=>"\u{21d4}",'hairsp;'=>"\u{200a}",'half;'=>"\u{bd}",'hamilt;'=>"\u{210b}",'hardcy;'=>"\u{44a}",'harr;'=>"\u{2194}",'harrcir;'=>"\u{2948}",'harrw;'=>"\u{21ad}",'hbar;'=>"\u{210f}",'hcirc;'=>"\u{125}",'hearts;'=>"\u{2665}",'heartsuit;'=>"\u{2665}",'hellip;'=>"\u{2026}",'hercon;'=>"\u{22b9}",'hfr;'=>"\u{1d525}",'hksearow;'=>"\u{2925}",'hkswarow;'=>"\u{2926}",'hoarr;'=>"\u{21ff}",'homtht;'=>"\u{223b}",'hookleftarrow;'=>"\u{21a9}",'hookrightarrow;'=>"\u{21aa}",'hopf;'=>"\u{1d559}",'horbar;'=>"\u{2015}",'hscr;'=>"\u{1d4bd}",'hslash;'=>"\u{210f}",'hstrok;'=>"\u{127}",'hybull;'=>"\u{2043}",'hyphen;'=>"\u{2010}",'iacute'=>"\u{ed}",'iacute;'=>"\u{ed}",'ic;'=>"\u{2063}",'icirc'=>"\u{ee}",'icirc;'=>"\u{ee}",'icy;'=>"\u{438}",'iecy;'=>"\u{435}",'iexcl'=>"\u{a1}",'iexcl;'=>"\u{a1}",'iff;'=>"\u{21d4}",'ifr;'=>"\u{1d526}",'igrave'=>"\u{ec}",'igrave;'=>"\u{ec}",'ii;'=>"\u{2148}",'iiiint;'=>"\u{2a0c}",'iiint;'=>"\u{222d}",'iinfin;'=>"\u{29dc}",'iiota;'=>"\u{2129}",'ijlig;'=>"\u{133}",'imacr;'=>"\u{12b}",'image;'=>"\u{2111}",'imagline;'=>"\u{2110}",'imagpart;'=>"\u{2111}",'imath;'=>"\u{131}",'imof;'=>"\u{22b7}",'imped;'=>"\u{1b5}",'in;'=>"\u{2208}",'incare;'=>"\u{2105}",'infin;'=>"\u{221e}",'infintie;'=>"\u{29dd}",'inodot;'=>"\u{131}",'int;'=>"\u{222b}",'intcal;'=>"\u{22ba}",'integers;'=>"\u{2124}",'intercal;'=>"\u{22ba}",'intlarhk;'=>"\u{2a17}",'intprod;'=>"\u{2a3c}",'iocy;'=>"\u{451}",'iogon;'=>"\u{12f}",'iopf;'=>"\u{1d55a}",'iota;'=>"\u{3b9}",'iprod;'=>"\u{2a3c}",'iquest'=>"\u{bf}",'iquest;'=>"\u{bf}",'iscr;'=>"\u{1d4be}",'isin;'=>"\u{2208}",'isinE;'=>"\u{22f9}",'isindot;'=>"\u{22f5}",'isins;'=>"\u{22f4}",'isinsv;'=>"\u{22f3}",'isinv;'=>"\u{2208}",'it;'=>"\u{2062}",'itilde;'=>"\u{129}",'iukcy;'=>"\u{456}",'iuml'=>"\u{ef}",'iuml;'=>"\u{ef}",'jcirc;'=>"\u{135}",'jcy;'=>"\u{439}",'jfr;'=>"\u{1d527}",'jmath;'=>"\u{237}",'jopf;'=>"\u{1d55b}",'jscr;'=>"\u{1d4bf}",'jsercy;'=>"\u{458}",'jukcy;'=>"\u{454}",'kappa;'=>"\u{3ba}",'kappav;'=>"\u{3f0}",'kcedil;'=>"\u{137}",'kcy;'=>"\u{43a}",'kfr;'=>"\u{1d528}",'kgreen;'=>"\u{138}",'khcy;'=>"\u{445}",'kjcy;'=>"\u{45c}",'kopf;'=>"\u{1d55c}",'kscr;'=>"\u{1d4c0}",'lAarr;'=>"\u{21da}",'lArr;'=>"\u{21d0}",'lAtail;'=>"\u{291b}",'lBarr;'=>"\u{290e}",'lE;'=>"\u{2266}",'lEg;'=>"\u{2a8b}",'lHar;'=>"\u{2962}",'lacute;'=>"\u{13a}",'laemptyv;'=>"\u{29b4}",'lagran;'=>"\u{2112}",'lambda;'=>"\u{3bb}",'lang;'=>"\u{27e8}",'langd;'=>"\u{2991}",'langle;'=>"\u{27e8}",'lap;'=>"\u{2a85}",'laquo'=>"\u{ab}",'laquo;'=>"\u{ab}",'larr;'=>"\u{2190}",'larrb;'=>"\u{21e4}",'larrbfs;'=>"\u{291f}",'larrfs;'=>"\u{291d}",'larrhk;'=>"\u{21a9}",'larrlp;'=>"\u{21ab}",'larrpl;'=>"\u{2939}",'larrsim;'=>"\u{2973}",'larrtl;'=>"\u{21a2}",'lat;'=>"\u{2aab}",'latail;'=>"\u{2919}",'late;'=>"\u{2aad}",'lates;'=>"\u{2aad}\u{fe00}",'lbarr;'=>"\u{290c}",'lbbrk;'=>"\u{2772}",'lbrace;'=>"\u{7b}",'lbrack;'=>"\u{5b}",'lbrke;'=>"\u{298b}",'lbrksld;'=>"\u{298f}",'lbrkslu;'=>"\u{298d}",'lcaron;'=>"\u{13e}",'lcedil;'=>"\u{13c}",'lceil;'=>"\u{2308}",'lcub;'=>"\u{7b}",'lcy;'=>"\u{43b}",'ldca;'=>"\u{2936}",'ldquo;'=>"\u{201c}",'ldquor;'=>"\u{201e}",'ldrdhar;'=>"\u{2967}",'ldrushar;'=>"\u{294b}",'ldsh;'=>"\u{21b2}",'le;'=>"\u{2264}",'leftarrow;'=>"\u{2190}",'leftarrowtail;'=>"\u{21a2}",'leftharpoondown;'=>"\u{21bd}",'leftharpoonup;'=>"\u{21bc}",'leftleftarrows;'=>"\u{21c7}",'leftrightarrow;'=>"\u{2194}",'leftrightarrows;'=>"\u{21c6}",'leftrightharpoons;'=>"\u{21cb}",'leftrightsquigarrow;'=>"\u{21ad}",'leftthreetimes;'=>"\u{22cb}",'leg;'=>"\u{22da}",'leq;'=>"\u{2264}",'leqq;'=>"\u{2266}",'leqslant;'=>"\u{2a7d}",'les;'=>"\u{2a7d}",'lescc;'=>"\u{2aa8}",'lesdot;'=>"\u{2a7f}",'lesdoto;'=>"\u{2a81}",'lesdotor;'=>"\u{2a83}",'lesg;'=>"\u{22da}\u{fe00}",'lesges;'=>"\u{2a93}",'lessapprox;'=>"\u{2a85}",'lessdot;'=>"\u{22d6}",'lesseqgtr;'=>"\u{22da}",'lesseqqgtr;'=>"\u{2a8b}",'lessgtr;'=>"\u{2276}",'lesssim;'=>"\u{2272}",'lfisht;'=>"\u{297c}",'lfloor;'=>"\u{230a}",'lfr;'=>"\u{1d529}",'lg;'=>"\u{2276}",'lgE;'=>"\u{2a91}",'lhard;'=>"\u{21bd}",'lharu;'=>"\u{21bc}",'lharul;'=>"\u{296a}",'lhblk;'=>"\u{2584}",'ljcy;'=>"\u{459}",'ll;'=>"\u{226a}",'llarr;'=>"\u{21c7}",'llcorner;'=>"\u{231e}",'llhard;'=>"\u{296b}",'lltri;'=>"\u{25fa}",'lmidot;'=>"\u{140}",'lmoust;'=>"\u{23b0}",'lmoustache;'=>"\u{23b0}",'lnE;'=>"\u{2268}",'lnap;'=>"\u{2a89}",'lnapprox;'=>"\u{2a89}",'lne;'=>"\u{2a87}",'lneq;'=>"\u{2a87}",'lneqq;'=>"\u{2268}",'lnsim;'=>"\u{22e6}",'loang;'=>"\u{27ec}",'loarr;'=>"\u{21fd}",'lobrk;'=>"\u{27e6}",'longleftarrow;'=>"\u{27f5}",'longleftrightarrow;'=>"\u{27f7}",'longmapsto;'=>"\u{27fc}",'longrightarrow;'=>"\u{27f6}",'looparrowleft;'=>"\u{21ab}",'looparrowright;'=>"\u{21ac}",'lopar;'=>"\u{2985}",'lopf;'=>"\u{1d55d}",'loplus;'=>"\u{2a2d}",'lotimes;'=>"\u{2a34}",'lowast;'=>"\u{2217}",'lowbar;'=>"\u{5f}",'loz;'=>"\u{25ca}",'lozenge;'=>"\u{25ca}",'lozf;'=>"\u{29eb}",'lpar;'=>"\u{28}",'lparlt;'=>"\u{2993}",'lrarr;'=>"\u{21c6}",'lrcorner;'=>"\u{231f}",'lrhar;'=>"\u{21cb}",'lrhard;'=>"\u{296d}",'lrm;'=>"\u{200e}",'lrtri;'=>"\u{22bf}",'lsaquo;'=>"\u{2039}",'lscr;'=>"\u{1d4c1}",'lsh;'=>"\u{21b0}",'lsim;'=>"\u{2272}",'lsime;'=>"\u{2a8d}",'lsimg;'=>"\u{2a8f}",'lsqb;'=>"\u{5b}",'lsquo;'=>"\u{2018}",'lsquor;'=>"\u{201a}",'lstrok;'=>"\u{142}",'lt'=>"\u{3c}",'lt;'=>"\u{3c}",'ltcc;'=>"\u{2aa6}",'ltcir;'=>"\u{2a79}",'ltdot;'=>"\u{22d6}",'lthree;'=>"\u{22cb}",'ltimes;'=>"\u{22c9}",'ltlarr;'=>"\u{2976}",'ltquest;'=>"\u{2a7b}",'ltrPar;'=>"\u{2996}",'ltri;'=>"\u{25c3}",'ltrie;'=>"\u{22b4}",'ltrif;'=>"\u{25c2}",'lurdshar;'=>"\u{294a}",'luruhar;'=>"\u{2966}",'lvertneqq;'=>"\u{2268}\u{fe00}",'lvnE;'=>"\u{2268}\u{fe00}",'mDDot;'=>"\u{223a}",'macr'=>"\u{af}",'macr;'=>"\u{af}",'male;'=>"\u{2642}",'malt;'=>"\u{2720}",'maltese;'=>"\u{2720}",'map;'=>"\u{21a6}",'mapsto;'=>"\u{21a6}",'mapstodown;'=>"\u{21a7}",'mapstoleft;'=>"\u{21a4}",'mapstoup;'=>"\u{21a5}",'marker;'=>"\u{25ae}",'mcomma;'=>"\u{2a29}",'mcy;'=>"\u{43c}",'mdash;'=>"\u{2014}",'measuredangle;'=>"\u{2221}",'mfr;'=>"\u{1d52a}",'mho;'=>"\u{2127}",'micro'=>"\u{b5}",'micro;'=>"\u{b5}",'mid;'=>"\u{2223}",'midast;'=>"\u{2a}",'midcir;'=>"\u{2af0}",'middot'=>"\u{b7}",'middot;'=>"\u{b7}",'minus;'=>"\u{2212}",'minusb;'=>"\u{229f}",'minusd;'=>"\u{2238}",'minusdu;'=>"\u{2a2a}",'mlcp;'=>"\u{2adb}",'mldr;'=>"\u{2026}",'mnplus;'=>"\u{2213}",'models;'=>"\u{22a7}",'mopf;'=>"\u{1d55e}",'mp;'=>"\u{2213}",'mscr;'=>"\u{1d4c2}",'mstpos;'=>"\u{223e}",'mu;'=>"\u{3bc}",'multimap;'=>"\u{22b8}",'mumap;'=>"\u{22b8}",'nGg;'=>"\u{22d9}\u{338}",'nGt;'=>"\u{226b}\u{20d2}",'nGtv;'=>"\u{226b}\u{338}",'nLeftarrow;'=>"\u{21cd}",'nLeftrightarrow;'=>"\u{21ce}",'nLl;'=>"\u{22d8}\u{338}",'nLt;'=>"\u{226a}\u{20d2}",'nLtv;'=>"\u{226a}\u{338}",'nRightarrow;'=>"\u{21cf}",'nVDash;'=>"\u{22af}",'nVdash;'=>"\u{22ae}",'nabla;'=>"\u{2207}",'nacute;'=>"\u{144}",'nang;'=>"\u{2220}\u{20d2}",'nap;'=>"\u{2249}",'napE;'=>"\u{2a70}\u{338}",'napid;'=>"\u{224b}\u{338}",'napos;'=>"\u{149}",'napprox;'=>"\u{2249}",'natur;'=>"\u{266e}",'natural;'=>"\u{266e}",'naturals;'=>"\u{2115}",'nbsp'=>"\u{a0}",'nbsp;'=>"\u{a0}",'nbump;'=>"\u{224e}\u{338}",'nbumpe;'=>"\u{224f}\u{338}",'ncap;'=>"\u{2a43}",'ncaron;'=>"\u{148}",'ncedil;'=>"\u{146}",'ncong;'=>"\u{2247}",'ncongdot;'=>"\u{2a6d}\u{338}",'ncup;'=>"\u{2a42}",'ncy;'=>"\u{43d}",'ndash;'=>"\u{2013}",'ne;'=>"\u{2260}",'neArr;'=>"\u{21d7}",'nearhk;'=>"\u{2924}",'nearr;'=>"\u{2197}",'nearrow;'=>"\u{2197}",'nedot;'=>"\u{2250}\u{338}",'nequiv;'=>"\u{2262}",'nesear;'=>"\u{2928}",'nesim;'=>"\u{2242}\u{338}",'nexist;'=>"\u{2204}",'nexists;'=>"\u{2204}",'nfr;'=>"\u{1d52b}",'ngE;'=>"\u{2267}\u{338}",'nge;'=>"\u{2271}",'ngeq;'=>"\u{2271}",'ngeqq;'=>"\u{2267}\u{338}",'ngeqslant;'=>"\u{2a7e}\u{338}",'nges;'=>"\u{2a7e}\u{338}",'ngsim;'=>"\u{2275}",'ngt;'=>"\u{226f}",'ngtr;'=>"\u{226f}",'nhArr;'=>"\u{21ce}",'nharr;'=>"\u{21ae}",'nhpar;'=>"\u{2af2}",'ni;'=>"\u{220b}",'nis;'=>"\u{22fc}",'nisd;'=>"\u{22fa}",'niv;'=>"\u{220b}",'njcy;'=>"\u{45a}",'nlArr;'=>"\u{21cd}",'nlE;'=>"\u{2266}\u{338}",'nlarr;'=>"\u{219a}",'nldr;'=>"\u{2025}",'nle;'=>"\u{2270}",'nleftarrow;'=>"\u{219a}",'nleftrightarrow;'=>"\u{21ae}",'nleq;'=>"\u{2270}",'nleqq;'=>"\u{2266}\u{338}",'nleqslant;'=>"\u{2a7d}\u{338}",'nles;'=>"\u{2a7d}\u{338}",'nless;'=>"\u{226e}",'nlsim;'=>"\u{2274}",'nlt;'=>"\u{226e}",'nltri;'=>"\u{22ea}",'nltrie;'=>"\u{22ec}",'nmid;'=>"\u{2224}",'nopf;'=>"\u{1d55f}",'not'=>"\u{ac}",'not;'=>"\u{ac}",'notin;'=>"\u{2209}",'notinE;'=>"\u{22f9}\u{338}",'notindot;'=>"\u{22f5}\u{338}",'notinva;'=>"\u{2209}",'notinvb;'=>"\u{22f7}",'notinvc;'=>"\u{22f6}",'notni;'=>"\u{220c}",'notniva;'=>"\u{220c}",'notnivb;'=>"\u{22fe}",'notnivc;'=>"\u{22fd}",'npar;'=>"\u{2226}",'nparallel;'=>"\u{2226}",'nparsl;'=>"\u{2afd}\u{20e5}",'npart;'=>"\u{2202}\u{338}",'npolint;'=>"\u{2a14}",'npr;'=>"\u{2280}",'nprcue;'=>"\u{22e0}",'npre;'=>"\u{2aaf}\u{338}",'nprec;'=>"\u{2280}",'npreceq;'=>"\u{2aaf}\u{338}",'nrArr;'=>"\u{21cf}",'nrarr;'=>"\u{219b}",'nrarrc;'=>"\u{2933}\u{338}",'nrarrw;'=>"\u{219d}\u{338}",'nrightarrow;'=>"\u{219b}",'nrtri;'=>"\u{22eb}",'nrtrie;'=>"\u{22ed}",'nsc;'=>"\u{2281}",'nsccue;'=>"\u{22e1}",'nsce;'=>"\u{2ab0}\u{338}",'nscr;'=>"\u{1d4c3}",'nshortmid;'=>"\u{2224}",'nshortparallel;'=>"\u{2226}",'nsim;'=>"\u{2241}",'nsime;'=>"\u{2244}",'nsimeq;'=>"\u{2244}",'nsmid;'=>"\u{2224}",'nspar;'=>"\u{2226}",'nsqsube;'=>"\u{22e2}",'nsqsupe;'=>"\u{22e3}",'nsub;'=>"\u{2284}",'nsubE;'=>"\u{2ac5}\u{338}",'nsube;'=>"\u{2288}",'nsubset;'=>"\u{2282}\u{20d2}",'nsubseteq;'=>"\u{2288}",'nsubseteqq;'=>"\u{2ac5}\u{338}",'nsucc;'=>"\u{2281}",'nsucceq;'=>"\u{2ab0}\u{338}",'nsup;'=>"\u{2285}",'nsupE;'=>"\u{2ac6}\u{338}",'nsupe;'=>"\u{2289}",'nsupset;'=>"\u{2283}\u{20d2}",'nsupseteq;'=>"\u{2289}",'nsupseteqq;'=>"\u{2ac6}\u{338}",'ntgl;'=>"\u{2279}",'ntilde'=>"\u{f1}",'ntilde;'=>"\u{f1}",'ntlg;'=>"\u{2278}",'ntriangleleft;'=>"\u{22ea}",'ntrianglelefteq;'=>"\u{22ec}",'ntriangleright;'=>"\u{22eb}",'ntrianglerighteq;'=>"\u{22ed}",'nu;'=>"\u{3bd}",'num;'=>"\u{23}",'numero;'=>"\u{2116}",'numsp;'=>"\u{2007}",'nvDash;'=>"\u{22ad}",'nvHarr;'=>"\u{2904}",'nvap;'=>"\u{224d}\u{20d2}",'nvdash;'=>"\u{22ac}",'nvge;'=>"\u{2265}\u{20d2}",'nvgt;'=>"\u{3e}\u{20d2}",'nvinfin;'=>"\u{29de}",'nvlArr;'=>"\u{2902}",'nvle;'=>"\u{2264}\u{20d2}",'nvlt;'=>"\u{3c}\u{20d2}",'nvltrie;'=>"\u{22b4}\u{20d2}",'nvrArr;'=>"\u{2903}",'nvrtrie;'=>"\u{22b5}\u{20d2}",'nvsim;'=>"\u{223c}\u{20d2}",'nwArr;'=>"\u{21d6}",'nwarhk;'=>"\u{2923}",'nwarr;'=>"\u{2196}",'nwarrow;'=>"\u{2196}",'nwnear;'=>"\u{2927}",'oS;'=>"\u{24c8}",'oacute'=>"\u{f3}",'oacute;'=>"\u{f3}",'oast;'=>"\u{229b}",'ocir;'=>"\u{229a}",'ocirc'=>"\u{f4}",'ocirc;'=>"\u{f4}",'ocy;'=>"\u{43e}",'odash;'=>"\u{229d}",'odblac;'=>"\u{151}",'odiv;'=>"\u{2a38}",'odot;'=>"\u{2299}",'odsold;'=>"\u{29bc}",'oelig;'=>"\u{153}",'ofcir;'=>"\u{29bf}",'ofr;'=>"\u{1d52c}",'ogon;'=>"\u{2db}",'ograve'=>"\u{f2}",'ograve;'=>"\u{f2}",'ogt;'=>"\u{29c1}",'ohbar;'=>"\u{29b5}",'ohm;'=>"\u{3a9}",'oint;'=>"\u{222e}",'olarr;'=>"\u{21ba}",'olcir;'=>"\u{29be}",'olcross;'=>"\u{29bb}",'oline;'=>"\u{203e}",'olt;'=>"\u{29c0}",'omacr;'=>"\u{14d}",'omega;'=>"\u{3c9}",'omicron;'=>"\u{3bf}",'omid;'=>"\u{29b6}",'ominus;'=>"\u{2296}",'oopf;'=>"\u{1d560}",'opar;'=>"\u{29b7}",'operp;'=>"\u{29b9}",'oplus;'=>"\u{2295}",'or;'=>"\u{2228}",'orarr;'=>"\u{21bb}",'ord;'=>"\u{2a5d}",'order;'=>"\u{2134}",'orderof;'=>"\u{2134}",'ordf'=>"\u{aa}",'ordf;'=>"\u{aa}",'ordm'=>"\u{ba}",'ordm;'=>"\u{ba}",'origof;'=>"\u{22b6}",'oror;'=>"\u{2a56}",'orslope;'=>"\u{2a57}",'orv;'=>"\u{2a5b}",'oscr;'=>"\u{2134}",'oslash'=>"\u{f8}",'oslash;'=>"\u{f8}",'osol;'=>"\u{2298}",'otilde'=>"\u{f5}",'otilde;'=>"\u{f5}",'otimes;'=>"\u{2297}",'otimesas;'=>"\u{2a36}",'ouml'=>"\u{f6}",'ouml;'=>"\u{f6}",'ovbar;'=>"\u{233d}",'par;'=>"\u{2225}",'para'=>"\u{b6}",'para;'=>"\u{b6}",'parallel;'=>"\u{2225}",'parsim;'=>"\u{2af3}",'parsl;'=>"\u{2afd}",'part;'=>"\u{2202}",'pcy;'=>"\u{43f}",'percnt;'=>"\u{25}",'period;'=>"\u{2e}",'permil;'=>"\u{2030}",'perp;'=>"\u{22a5}",'pertenk;'=>"\u{2031}",'pfr;'=>"\u{1d52d}",'phi;'=>"\u{3c6}",'phiv;'=>"\u{3d5}",'phmmat;'=>"\u{2133}",'phone;'=>"\u{260e}",'pi;'=>"\u{3c0}",'pitchfork;'=>"\u{22d4}",'piv;'=>"\u{3d6}",'planck;'=>"\u{210f}",'planckh;'=>"\u{210e}",'plankv;'=>"\u{210f}",'plus;'=>"\u{2b}",'plusacir;'=>"\u{2a23}",'plusb;'=>"\u{229e}",'pluscir;'=>"\u{2a22}",'plusdo;'=>"\u{2214}",'plusdu;'=>"\u{2a25}",'pluse;'=>"\u{2a72}",'plusmn'=>"\u{b1}",'plusmn;'=>"\u{b1}",'plussim;'=>"\u{2a26}",'plustwo;'=>"\u{2a27}",'pm;'=>"\u{b1}",'pointint;'=>"\u{2a15}",'popf;'=>"\u{1d561}",'pound'=>"\u{a3}",'pound;'=>"\u{a3}",'pr;'=>"\u{227a}",'prE;'=>"\u{2ab3}",'prap;'=>"\u{2ab7}",'prcue;'=>"\u{227c}",'pre;'=>"\u{2aaf}",'prec;'=>"\u{227a}",'precapprox;'=>"\u{2ab7}",'preccurlyeq;'=>"\u{227c}",'preceq;'=>"\u{2aaf}",'precnapprox;'=>"\u{2ab9}",'precneqq;'=>"\u{2ab5}",'precnsim;'=>"\u{22e8}",'precsim;'=>"\u{227e}",'prime;'=>"\u{2032}",'primes;'=>"\u{2119}",'prnE;'=>"\u{2ab5}",'prnap;'=>"\u{2ab9}",'prnsim;'=>"\u{22e8}",'prod;'=>"\u{220f}",'profalar;'=>"\u{232e}",'profline;'=>"\u{2312}",'profsurf;'=>"\u{2313}",'prop;'=>"\u{221d}",'propto;'=>"\u{221d}",'prsim;'=>"\u{227e}",'prurel;'=>"\u{22b0}",'pscr;'=>"\u{1d4c5}",'psi;'=>"\u{3c8}",'puncsp;'=>"\u{2008}",'qfr;'=>"\u{1d52e}",'qint;'=>"\u{2a0c}",'qopf;'=>"\u{1d562}",'qprime;'=>"\u{2057}",'qscr;'=>"\u{1d4c6}",'quaternions;'=>"\u{210d}",'quatint;'=>"\u{2a16}",'quest;'=>"\u{3f}",'questeq;'=>"\u{225f}",'quot'=>"\u{22}",'quot;'=>"\u{22}",'rAarr;'=>"\u{21db}",'rArr;'=>"\u{21d2}",'rAtail;'=>"\u{291c}",'rBarr;'=>"\u{290f}",'rHar;'=>"\u{2964}",'race;'=>"\u{223d}\u{331}",'racute;'=>"\u{155}",'radic;'=>"\u{221a}",'raemptyv;'=>"\u{29b3}",'rang;'=>"\u{27e9}",'rangd;'=>"\u{2992}",'range;'=>"\u{29a5}",'rangle;'=>"\u{27e9}",'raquo'=>"\u{bb}",'raquo;'=>"\u{bb}",'rarr;'=>"\u{2192}",'rarrap;'=>"\u{2975}",'rarrb;'=>"\u{21e5}",'rarrbfs;'=>"\u{2920}",'rarrc;'=>"\u{2933}",'rarrfs;'=>"\u{291e}",'rarrhk;'=>"\u{21aa}",'rarrlp;'=>"\u{21ac}",'rarrpl;'=>"\u{2945}",'rarrsim;'=>"\u{2974}",'rarrtl;'=>"\u{21a3}",'rarrw;'=>"\u{219d}",'ratail;'=>"\u{291a}",'ratio;'=>"\u{2236}",'rationals;'=>"\u{211a}",'rbarr;'=>"\u{290d}",'rbbrk;'=>"\u{2773}",'rbrace;'=>"\u{7d}",'rbrack;'=>"\u{5d}",'rbrke;'=>"\u{298c}",'rbrksld;'=>"\u{298e}",'rbrkslu;'=>"\u{2990}",'rcaron;'=>"\u{159}",'rcedil;'=>"\u{157}",'rceil;'=>"\u{2309}",'rcub;'=>"\u{7d}",'rcy;'=>"\u{440}",'rdca;'=>"\u{2937}",'rdldhar;'=>"\u{2969}",'rdquo;'=>"\u{201d}",'rdquor;'=>"\u{201d}",'rdsh;'=>"\u{21b3}",'real;'=>"\u{211c}",'realine;'=>"\u{211b}",'realpart;'=>"\u{211c}",'reals;'=>"\u{211d}",'rect;'=>"\u{25ad}",'reg'=>"\u{ae}",'reg;'=>"\u{ae}",'rfisht;'=>"\u{297d}",'rfloor;'=>"\u{230b}",'rfr;'=>"\u{1d52f}",'rhard;'=>"\u{21c1}",'rharu;'=>"\u{21c0}",'rharul;'=>"\u{296c}",'rho;'=>"\u{3c1}",'rhov;'=>"\u{3f1}",'rightarrow;'=>"\u{2192}",'rightarrowtail;'=>"\u{21a3}",'rightharpoondown;'=>"\u{21c1}",'rightharpoonup;'=>"\u{21c0}",'rightleftarrows;'=>"\u{21c4}",'rightleftharpoons;'=>"\u{21cc}",'rightrightarrows;'=>"\u{21c9}",'rightsquigarrow;'=>"\u{219d}",'rightthreetimes;'=>"\u{22cc}",'ring;'=>"\u{2da}",'risingdotseq;'=>"\u{2253}",'rlarr;'=>"\u{21c4}",'rlhar;'=>"\u{21cc}",'rlm;'=>"\u{200f}",'rmoust;'=>"\u{23b1}",'rmoustache;'=>"\u{23b1}",'rnmid;'=>"\u{2aee}",'roang;'=>"\u{27ed}",'roarr;'=>"\u{21fe}",'robrk;'=>"\u{27e7}",'ropar;'=>"\u{2986}",'ropf;'=>"\u{1d563}",'roplus;'=>"\u{2a2e}",'rotimes;'=>"\u{2a35}",'rpar;'=>"\u{29}",'rpargt;'=>"\u{2994}",'rppolint;'=>"\u{2a12}",'rrarr;'=>"\u{21c9}",'rsaquo;'=>"\u{203a}",'rscr;'=>"\u{1d4c7}",'rsh;'=>"\u{21b1}",'rsqb;'=>"\u{5d}",'rsquo;'=>"\u{2019}",'rsquor;'=>"\u{2019}",'rthree;'=>"\u{22cc}",'rtimes;'=>"\u{22ca}",'rtri;'=>"\u{25b9}",'rtrie;'=>"\u{22b5}",'rtrif;'=>"\u{25b8}",'rtriltri;'=>"\u{29ce}",'ruluhar;'=>"\u{2968}",'rx;'=>"\u{211e}",'sacute;'=>"\u{15b}",'sbquo;'=>"\u{201a}",'sc;'=>"\u{227b}",'scE;'=>"\u{2ab4}",'scap;'=>"\u{2ab8}",'scaron;'=>"\u{161}",'sccue;'=>"\u{227d}",'sce;'=>"\u{2ab0}",'scedil;'=>"\u{15f}",'scirc;'=>"\u{15d}",'scnE;'=>"\u{2ab6}",'scnap;'=>"\u{2aba}",'scnsim;'=>"\u{22e9}",'scpolint;'=>"\u{2a13}",'scsim;'=>"\u{227f}",'scy;'=>"\u{441}",'sdot;'=>"\u{22c5}",'sdotb;'=>"\u{22a1}",'sdote;'=>"\u{2a66}",'seArr;'=>"\u{21d8}",'searhk;'=>"\u{2925}",'searr;'=>"\u{2198}",'searrow;'=>"\u{2198}",'sect'=>"\u{a7}",'sect;'=>"\u{a7}",'semi;'=>"\u{3b}",'seswar;'=>"\u{2929}",'setminus;'=>"\u{2216}",'setmn;'=>"\u{2216}",'sext;'=>"\u{2736}",'sfr;'=>"\u{1d530}",'sfrown;'=>"\u{2322}",'sharp;'=>"\u{266f}",'shchcy;'=>"\u{449}",'shcy;'=>"\u{448}",'shortmid;'=>"\u{2223}",'shortparallel;'=>"\u{2225}",'shy'=>"\u{ad}",'shy;'=>"\u{ad}",'sigma;'=>"\u{3c3}",'sigmaf;'=>"\u{3c2}",'sigmav;'=>"\u{3c2}",'sim;'=>"\u{223c}",'simdot;'=>"\u{2a6a}",'sime;'=>"\u{2243}",'simeq;'=>"\u{2243}",'simg;'=>"\u{2a9e}",'simgE;'=>"\u{2aa0}",'siml;'=>"\u{2a9d}",'simlE;'=>"\u{2a9f}",'simne;'=>"\u{2246}",'simplus;'=>"\u{2a24}",'simrarr;'=>"\u{2972}",'slarr;'=>"\u{2190}",'smallsetminus;'=>"\u{2216}",'smashp;'=>"\u{2a33}",'smeparsl;'=>"\u{29e4}",'smid;'=>"\u{2223}",'smile;'=>"\u{2323}",'smt;'=>"\u{2aaa}",'smte;'=>"\u{2aac}",'smtes;'=>"\u{2aac}\u{fe00}",'softcy;'=>"\u{44c}",'sol;'=>"\u{2f}",'solb;'=>"\u{29c4}",'solbar;'=>"\u{233f}",'sopf;'=>"\u{1d564}",'spades;'=>"\u{2660}",'spadesuit;'=>"\u{2660}",'spar;'=>"\u{2225}",'sqcap;'=>"\u{2293}",'sqcaps;'=>"\u{2293}\u{fe00}",'sqcup;'=>"\u{2294}",'sqcups;'=>"\u{2294}\u{fe00}",'sqsub;'=>"\u{228f}",'sqsube;'=>"\u{2291}",'sqsubset;'=>"\u{228f}",'sqsubseteq;'=>"\u{2291}",'sqsup;'=>"\u{2290}",'sqsupe;'=>"\u{2292}",'sqsupset;'=>"\u{2290}",'sqsupseteq;'=>"\u{2292}",'squ;'=>"\u{25a1}",'square;'=>"\u{25a1}",'squarf;'=>"\u{25aa}",'squf;'=>"\u{25aa}",'srarr;'=>"\u{2192}",'sscr;'=>"\u{1d4c8}",'ssetmn;'=>"\u{2216}",'ssmile;'=>"\u{2323}",'sstarf;'=>"\u{22c6}",'star;'=>"\u{2606}",'starf;'=>"\u{2605}",'straightepsilon;'=>"\u{3f5}",'straightphi;'=>"\u{3d5}",'strns;'=>"\u{af}",'sub;'=>"\u{2282}",'subE;'=>"\u{2ac5}",'subdot;'=>"\u{2abd}",'sube;'=>"\u{2286}",'subedot;'=>"\u{2ac3}",'submult;'=>"\u{2ac1}",'subnE;'=>"\u{2acb}",'subne;'=>"\u{228a}",'subplus;'=>"\u{2abf}",'subrarr;'=>"\u{2979}",'subset;'=>"\u{2282}",'subseteq;'=>"\u{2286}",'subseteqq;'=>"\u{2ac5}",'subsetneq;'=>"\u{228a}",'subsetneqq;'=>"\u{2acb}",'subsim;'=>"\u{2ac7}",'subsub;'=>"\u{2ad5}",'subsup;'=>"\u{2ad3}",'succ;'=>"\u{227b}",'succapprox;'=>"\u{2ab8}",'succcurlyeq;'=>"\u{227d}",'succeq;'=>"\u{2ab0}",'succnapprox;'=>"\u{2aba}",'succneqq;'=>"\u{2ab6}",'succnsim;'=>"\u{22e9}",'succsim;'=>"\u{227f}",'sum;'=>"\u{2211}",'sung;'=>"\u{266a}",'sup1'=>"\u{b9}",'sup1;'=>"\u{b9}",'sup2'=>"\u{b2}",'sup2;'=>"\u{b2}",'sup3'=>"\u{b3}",'sup3;'=>"\u{b3}",'sup;'=>"\u{2283}",'supE;'=>"\u{2ac6}",'supdot;'=>"\u{2abe}",'supdsub;'=>"\u{2ad8}",'supe;'=>"\u{2287}",'supedot;'=>"\u{2ac4}",'suphsol;'=>"\u{27c9}",'suphsub;'=>"\u{2ad7}",'suplarr;'=>"\u{297b}",'supmult;'=>"\u{2ac2}",'supnE;'=>"\u{2acc}",'supne;'=>"\u{228b}",'supplus;'=>"\u{2ac0}",'supset;'=>"\u{2283}",'supseteq;'=>"\u{2287}",'supseteqq;'=>"\u{2ac6}",'supsetneq;'=>"\u{228b}",'supsetneqq;'=>"\u{2acc}",'supsim;'=>"\u{2ac8}",'supsub;'=>"\u{2ad4}",'supsup;'=>"\u{2ad6}",'swArr;'=>"\u{21d9}",'swarhk;'=>"\u{2926}",'swarr;'=>"\u{2199}",'swarrow;'=>"\u{2199}",'swnwar;'=>"\u{292a}",'szlig'=>"\u{df}",'szlig;'=>"\u{df}",'target;'=>"\u{2316}",'tau;'=>"\u{3c4}",'tbrk;'=>"\u{23b4}",'tcaron;'=>"\u{165}",'tcedil;'=>"\u{163}",'tcy;'=>"\u{442}",'tdot;'=>"\u{20db}",'telrec;'=>"\u{2315}",'tfr;'=>"\u{1d531}",'there4;'=>"\u{2234}",'therefore;'=>"\u{2234}",'theta;'=>"\u{3b8}",'thetasym;'=>"\u{3d1}",'thetav;'=>"\u{3d1}",'thickapprox;'=>"\u{2248}",'thicksim;'=>"\u{223c}",'thinsp;'=>"\u{2009}",'thkap;'=>"\u{2248}",'thksim;'=>"\u{223c}",'thorn'=>"\u{fe}",'thorn;'=>"\u{fe}",'tilde;'=>"\u{2dc}",'times'=>"\u{d7}",'times;'=>"\u{d7}",'timesb;'=>"\u{22a0}",'timesbar;'=>"\u{2a31}",'timesd;'=>"\u{2a30}",'tint;'=>"\u{222d}",'toea;'=>"\u{2928}",'top;'=>"\u{22a4}",'topbot;'=>"\u{2336}",'topcir;'=>"\u{2af1}",'topf;'=>"\u{1d565}",'topfork;'=>"\u{2ada}",'tosa;'=>"\u{2929}",'tprime;'=>"\u{2034}",'trade;'=>"\u{2122}",'triangle;'=>"\u{25b5}",'triangledown;'=>"\u{25bf}",'triangleleft;'=>"\u{25c3}",'trianglelefteq;'=>"\u{22b4}",'triangleq;'=>"\u{225c}",'triangleright;'=>"\u{25b9}",'trianglerighteq;'=>"\u{22b5}",'tridot;'=>"\u{25ec}",'trie;'=>"\u{225c}",'triminus;'=>"\u{2a3a}",'triplus;'=>"\u{2a39}",'trisb;'=>"\u{29cd}",'tritime;'=>"\u{2a3b}",'trpezium;'=>"\u{23e2}",'tscr;'=>"\u{1d4c9}",'tscy;'=>"\u{446}",'tshcy;'=>"\u{45b}",'tstrok;'=>"\u{167}",'twixt;'=>"\u{226c}",'twoheadleftarrow;'=>"\u{219e}",'twoheadrightarrow;'=>"\u{21a0}",'uArr;'=>"\u{21d1}",'uHar;'=>"\u{2963}",'uacute'=>"\u{fa}",'uacute;'=>"\u{fa}",'uarr;'=>"\u{2191}",'ubrcy;'=>"\u{45e}",'ubreve;'=>"\u{16d}",'ucirc'=>"\u{fb}",'ucirc;'=>"\u{fb}",'ucy;'=>"\u{443}",'udarr;'=>"\u{21c5}",'udblac;'=>"\u{171}",'udhar;'=>"\u{296e}",'ufisht;'=>"\u{297e}",'ufr;'=>"\u{1d532}",'ugrave'=>"\u{f9}",'ugrave;'=>"\u{f9}",'uharl;'=>"\u{21bf}",'uharr;'=>"\u{21be}",'uhblk;'=>"\u{2580}",'ulcorn;'=>"\u{231c}",'ulcorner;'=>"\u{231c}",'ulcrop;'=>"\u{230f}",'ultri;'=>"\u{25f8}",'umacr;'=>"\u{16b}",'uml'=>"\u{a8}",'uml;'=>"\u{a8}",'uogon;'=>"\u{173}",'uopf;'=>"\u{1d566}",'uparrow;'=>"\u{2191}",'updownarrow;'=>"\u{2195}",'upharpoonleft;'=>"\u{21bf}",'upharpoonright;'=>"\u{21be}",'uplus;'=>"\u{228e}",'upsi;'=>"\u{3c5}",'upsih;'=>"\u{3d2}",'upsilon;'=>"\u{3c5}",'upuparrows;'=>"\u{21c8}",'urcorn;'=>"\u{231d}",'urcorner;'=>"\u{231d}",'urcrop;'=>"\u{230e}",'uring;'=>"\u{16f}",'urtri;'=>"\u{25f9}",'uscr;'=>"\u{1d4ca}",'utdot;'=>"\u{22f0}",'utilde;'=>"\u{169}",'utri;'=>"\u{25b5}",'utrif;'=>"\u{25b4}",'uuarr;'=>"\u{21c8}",'uuml'=>"\u{fc}",'uuml;'=>"\u{fc}",'uwangle;'=>"\u{29a7}",'vArr;'=>"\u{21d5}",'vBar;'=>"\u{2ae8}",'vBarv;'=>"\u{2ae9}",'vDash;'=>"\u{22a8}",'vangrt;'=>"\u{299c}",'varepsilon;'=>"\u{3f5}",'varkappa;'=>"\u{3f0}",'varnothing;'=>"\u{2205}",'varphi;'=>"\u{3d5}",'varpi;'=>"\u{3d6}",'varpropto;'=>"\u{221d}",'varr;'=>"\u{2195}",'varrho;'=>"\u{3f1}",'varsigma;'=>"\u{3c2}",'varsubsetneq;'=>"\u{228a}\u{fe00}",'varsubsetneqq;'=>"\u{2acb}\u{fe00}",'varsupsetneq;'=>"\u{228b}\u{fe00}",'varsupsetneqq;'=>"\u{2acc}\u{fe00}",'vartheta;'=>"\u{3d1}",'vartriangleleft;'=>"\u{22b2}",'vartriangleright;'=>"\u{22b3}",'vcy;'=>"\u{432}",'vdash;'=>"\u{22a2}",'vee;'=>"\u{2228}",'veebar;'=>"\u{22bb}",'veeeq;'=>"\u{225a}",'vellip;'=>"\u{22ee}",'verbar;'=>"\u{7c}",'vert;'=>"\u{7c}",'vfr;'=>"\u{1d533}",'vltri;'=>"\u{22b2}",'vnsub;'=>"\u{2282}\u{20d2}",'vnsup;'=>"\u{2283}\u{20d2}",'vopf;'=>"\u{1d567}",'vprop;'=>"\u{221d}",'vrtri;'=>"\u{22b3}",'vscr;'=>"\u{1d4cb}",'vsubnE;'=>"\u{2acb}\u{fe00}",'vsubne;'=>"\u{228a}\u{fe00}",'vsupnE;'=>"\u{2acc}\u{fe00}",'vsupne;'=>"\u{228b}\u{fe00}",'vzigzag;'=>"\u{299a}",'wcirc;'=>"\u{175}",'wedbar;'=>"\u{2a5f}",'wedge;'=>"\u{2227}",'wedgeq;'=>"\u{2259}",'weierp;'=>"\u{2118}",'wfr;'=>"\u{1d534}",'wopf;'=>"\u{1d568}",'wp;'=>"\u{2118}",'wr;'=>"\u{2240}",'wreath;'=>"\u{2240}",'wscr;'=>"\u{1d4cc}",'xcap;'=>"\u{22c2}",'xcirc;'=>"\u{25ef}",'xcup;'=>"\u{22c3}",'xdtri;'=>"\u{25bd}",'xfr;'=>"\u{1d535}",'xhArr;'=>"\u{27fa}",'xharr;'=>"\u{27f7}",'xi;'=>"\u{3be}",'xlArr;'=>"\u{27f8}",'xlarr;'=>"\u{27f5}",'xmap;'=>"\u{27fc}",'xnis;'=>"\u{22fb}",'xodot;'=>"\u{2a00}",'xopf;'=>"\u{1d569}",'xoplus;'=>"\u{2a01}",'xotime;'=>"\u{2a02}",'xrArr;'=>"\u{27f9}",'xrarr;'=>"\u{27f6}",'xscr;'=>"\u{1d4cd}",'xsqcup;'=>"\u{2a06}",'xuplus;'=>"\u{2a04}",'xutri;'=>"\u{25b3}",'xvee;'=>"\u{22c1}",'xwedge;'=>"\u{22c0}",'yacute'=>"\u{fd}",'yacute;'=>"\u{fd}",'yacy;'=>"\u{44f}",'ycirc;'=>"\u{177}",'ycy;'=>"\u{44b}",'yen'=>"\u{a5}",'yen;'=>"\u{a5}",'yfr;'=>"\u{1d536}",'yicy;'=>"\u{457}",'yopf;'=>"\u{1d56a}",'yscr;'=>"\u{1d4ce}",'yucy;'=>"\u{44e}",'yuml'=>"\u{ff}",'yuml;'=>"\u{ff}",'zacute;'=>"\u{17a}",'zcaron;'=>"\u{17e}",'zcy;'=>"\u{437}",'zdot;'=>"\u{17c}",'zeetrf;'=>"\u{2128}",'zeta;'=>"\u{3b6}",'zfr;'=>"\u{1d537}",'zhcy;'=>"\u{436}",'zigrarr;'=>"\u{21dd}",'zopf;'=>"\u{1d56b}",'zscr;'=>"\u{1d4cf}",'zwj;'=>"\u{200d}",'zwnj;'=>"\u{200c}" - ]; - const C1_TABLE = [ - 128=>8364,130=>8218,131=>402,132=>8222,133=>8230,134=>8224,135=>8225,136=>710,137=>8240,138=>352,139=>8249,140=>338,142=>381,145=>8216,146=>8217,147=>8220,148=>8221,149=>8226,150=>8211,151=>8212,152=>732,153=>8482,154=>353,155=>8250,156=>339,158=>382,159=>376 - ]; -} diff --git a/lib/Charset.php b/lib/Charset.php deleted file mode 100644 index 250c573..0000000 --- a/lib/Charset.php +++ /dev/null @@ -1,432 +0,0 @@ -params['charset'])) { - $encoding = Encoding::matchLabel($type->params['charset']); - if ($encoding) { - return $encoding['name']; - } - } - return null; - } - - /** Inspects the head of an HTML string to guess its encoding - * - * @param string $data The HTML string to scan - * @param int $endAfter The number of bytes of the string to stop after - */ - public static function fromPrescan(string $data, int $endAfter = 1024): ?string { - # When an algorithm requires a user agent to prescan a byte stream to - # determine its encoding, given some defined end condition, then it - # must run the following steps. - # These steps either abort unsuccessfully or return a character - # encoding. If at any point during these steps (including during - # instances of the get an attribute algorithm invoked by this one) - # the user agent either runs out of bytes (meaning the position - # pointer created in the first step below goes beyond the end of the - # byte stream obtained so far) or reaches its end condition, then - # abort the prescan a byte stream to determine its encoding - # algorithm unsuccessfully. - $s = substr($data, 0, $endAfter); - $endAfter = strlen($s); - - # Let position be a pointer to a byte in the input byte stream, - # initially pointing at the first byte. - $pos = 0; - - # Loop: If position points to: - while ($pos < $endAfter) { - // OPTIMIZATION: Start my skipping anything not a less-than sign - if (@$s[$pos] === "<") { - $pos++; - - # A sequence of bytes starting with: 0x3C 0x21 0x2D 0x2D (`' sequence) and - # comes after the 0x3C byte that was found.e (The two - # 0x2D bytes can be the same as those in the '", $pos) ?: $endAfter) + 3; - } - # A sequence of bytes starting with: 0x3C, 0x4D or 0x6D, - # 0x45 or 0x65, 0x54 or 0x74, 0x41 or 0x61, and one of - # 0x09, 0x0A, 0x0C, 0x0D, 0x20, 0x2F (case-insensitive - # ASCII 'i", substr($s, $pos, 5))) { - # Advance the position pointer so that it points at - # the next 0x09, 0x0A, 0x0C, 0x0D, 0x20, or 0x2F - # byte (the one in sequence of characters matched above). - $pos += 5; - # Let attribute list be an empty list of strings. - # Let got pragma be false. - # Let need pragma be null. - # Let charset be the null value (which, for the purposes - # of this algorithm, is distinct from an unrecognized - # encoding or the empty string). - $attrList = []; - $gotPragma = false; - $needPragma = null; - $charset = null; - - # Attributes: Get an attribute and its value. - # If no attribute was sniffed, then jump to the processing step below. - while ($attr = self::getAttribute($s, $pos)) { - # If the attribute's name is already in attribute list, - # then return to the step labeled attributes. - if (isset($attrList[$attr['name']])) { - continue; - } - # Add the attribute's name to attribute list. - $attrList[$attr['name']] = true; - # Run the appropriate step from the following list, if one applies: - - # If the attribute's name is "http-equiv" - if ($attr['name'] === "http-equiv") { - # If the attribute's value is "content-type", then set got pragma to true. - if ($attr['value'] === "content-type") { - $gotPragma = true; - } - } - # If the attribute's name is "content" - elseif ($attr['name'] === "content") { - # Apply the algorithm for extracting a character encoding from a meta - # element, giving the attribute's value as the string to parse. - # If a character encoding is returned, and if charset is still set to - # null, let charset be the encoding returned, and set need pragma to true. - - // OPTIMIZATION: Check if charset is null before performing the algorithm - if ($charset === null && $candidate = self::fromMeta($attr['value'])) { - $charset = $candidate; - $needPragma = true; - } - } - # If the attribute's name is "charset" - elseif ($attr['name'] === "charset") { - # Let charset be the result of getting an encoding from the attribute's - # value, and set need pragma to false. - $candidate = self::fromCharset($attr['value']); - $charset = $candidate ?? false; // false signifies 'failure' - $needPragma = false; - } - } - - # Processing: If need pragma is null, then jump to the step below labeled next byte. - # If need pragma is true but got pragma is false, then jump to the step below labeled next byte. - if ($needPragma === null || ($needPragma && !$gotPragma)) { - continue; - } - # If charset is failure, then jump to the step below labeled next byte. - if ($charset === false) { - $pos++; - continue; - } - # If charset is a UTF-16 encoding, then set charset to UTF-8. - elseif ($charset === "UTF-16" || $charset === "UTF-16LE" || $charset === "UTF-16BE") { - $charset = "UTF-8"; - } - # If charset is x-user-defined, then set charset to windows-1252. - elseif ($charset === "x-user-defined") { - $charset = "windows-1252"; - } - # Abort the prescan a byte stream to determine its encoding algorithm, - # returning the encoding given by charset. - return $charset; - } - # A sequence of bytes starting with a 0x3C byte (<), optionally a 0x2F byte (/), - # and finally a byte in the range 0x41-0x5A or 0x61-0x7A (A-Z or a-z) - elseif ((@$s[$pos] === "/" && ctype_alpha(@$s[$pos + 1])) || (ctype_alpha(@$s[$pos]))) { - # Advance the position pointer so that it points at the next - # 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), or 0x3E (>) byte. - while (!in_array(@$s[++$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", ">", ""])); - # Repeatedly get an attribute until no further attributes can be found, - # then jump to the step below labeled next byte. - while(self::getAttribute($s, $pos)); - } - # A sequence of bytes starting with: 0x3C 0x21 (`) that comes after the 0x3C byte that was found. - $pos = (strpos($s, ">", $pos) ?: $endAfter) + 1; - } - } - # Any other byte - else { - # Do nothing with that byte. - $pos++; - } - } - return null; - } - - /** Scans an attribute during the encoding detection pre-scan */ - protected static function getAttribute(string $s, &$pos): array { - # When the prescan a byte stream to determine its encoding - # algorithm says to get an attribute, it means doing this: - - # If the byte at position is one of - # 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), - # or 0x2F (/) then advance position to the next byte and - # redo this step. - while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) { - $pos++; - } - $char = @$s[$pos]; - - # If the byte at position is 0x3E (>), - # then abort the get an attribute algorithm. There isn't one. - if ($char === ">") { - return []; - } - # Otherwise, the byte at position is the start of the attribute name. - # Let attribute name and attribute value be the empty string. - $name = ""; - $value = ""; - - # Process the byte at position as follows: - while ($char !== "") { - # If it is 0x3D (=), and the attribute name is longer than the empty string - if ($char === "=" && $name !== "") { - # Advance position to the next byte and jump to the step below labeled value. - $pos++; - goto value; - } - # If it is 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), or 0x20 (SP) - elseif (in_array($char, ["\x09", "\x0A", "\x0C", "\x0D", " "])) { - goto spaces; - } - # If it is 0x2F (/) or 0x3E (>) - elseif ($char === "/" || $char === ">") { - # Abort the get an attribute algorithm. - # The attribute's name is the value of attribute name, its value is the empty string. - return ['name' => $name, 'value' => $value]; - } - # If it is in the range 0x41 (A) to 0x5A (Z) - # Anything else - else { - # Append the code point with the same value as the byte at position to attribute name. - # (It doesn't actually matter how bytes outside the ASCII range are handled here, - # since only ASCII bytes can contribute to the detection of a character encoding.) - - // OPTIMIZATION: Also handle uppercase characters - $name .= strtolower($char); - } - - # Advance position to the next byte and return to the previous step. - $char = @$s[++$pos]; - } - - if ($char === "") { - // Out of bytes - return []; - } - - spaces: - # If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), - # or 0x20 (SP) then advance position to the next byte, then, repeat this step. - while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) { - $pos++; - } - $char = @$s[$pos]; - if ($char === "") { - // Out of bytes - return []; - } - # If the byte at position is not 0x3D (=), abort the get an attribute algorithm. - # The attribute's name is the value of attribute name, its value is the empty string. - if ($char !== "=") { - return ['name' => $name, 'value' => $value]; - } - # Advance position past the 0x3D (=) byte. - $char = @$s[++$pos]; - - value: - # If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), - # or 0x20 (SP) then advance position to the next byte, then, repeat this step. - while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) { - $pos++; - } - $char = @$s[$pos]; - if ($char === "") { - // Out of bytes - return []; - } - # Process the byte at position as follows: - # If it is 0x22 (") or 0x27 (') - if ($char === "'" || $char === '"') { - # Let b be the value of the byte at position. - $b = $char; - # Quote loop: Advance position to the next byte. - while (($char = @$s[++$pos]) !== "") { - # If the value of the byte at position is the value of b, - # then advance position to the next byte and abort - # the "get an attribute" algorithm. - # The attribute's name is the value of attribute name, - # and its value is the value of attribute value. - if ($char === $b) { - $pos++; - return ['name' => $name, 'value' => $value]; - } - # Otherwise, append a code point to attribute value whose - # value is the same as the value of the byte at position. - - // OPTIMIZATION: Also handle uppercase characters - $value .= strtolower($char); - } - // Out of bytes - return []; - } - # If it is 0x3E (>) - elseif ($char === ">") { - # Abort the get an attribute algorithm. - # The attribute's name is the value of attribute name, - # its value is the empty string. - return ['name' => $name, 'value' => $value]; - } - # Anything else - else { - # Append a code point with the same value as the byte at position to attribute value. - # Advance position to the next byte. - - // OPTIMIZATION: Also handle uppercase characters - $value .= strtolower($char); - - while (($char = @$s[++$pos]) !== "") { - # Process the byte at position as follows: - # If it is 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), or 0x3E (>) - if (in_array($char, ["\x09", "\x0A", "\x0C", "\x0D", " ", ">"])) { - # Abort the get an attribute algorithm. - # The attribute's name is the value of attribute name - # and its value is the value of attribute value. - return ['name' => $name, 'value' => $value]; - } - # If it is in the range 0x41 (A) to 0x5A (Z) - # Anything else - else { - # Append a code point with the same value as - # the byte at position to attribute value. - $value .= strtolower($char); - } - } - // Out of bytes - return []; - } - } - - /** Interprets a quasi-Content-Type value during the encoding detection pre-scan */ - protected static function fromMeta(string $s): ?string { - # The algorithm for extracting a character encoding from a meta element, - # given a string s, is as follows. - # It either returns a character encoding or nothing. - - # Let position be a pointer into s, initially pointing at the start of the string. - $pos = 0; - $end = strlen($s); - - # Loop: - while ($pos < $end) { - # Find the first seven characters in s after position - # that are an ASCII case-insensitive match for the word "charset". - # If no such match is found, return nothing. - $found = stripos($s, "charset", $pos); - if ($found === false) { - return null; - } - $pos = $found + 7; - # Skip any ASCII whitespace that immediately follow the word "charset" - # (there might not be any). - while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) { - $pos++; - } - # If the next character is not a U+003D EQUALS SIGN (=), - # then move position to point just before that next - # character, and jump back to the step labeled loop. - if (@$s[$pos] !== "=") { - continue; - } - # Skip any ASCII whitespace that immediately follow the equals sign - # (there might not be any). - while (in_array(@$s[++$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])); - - # Process the next character as follows: - $char = @$s[$pos]; - - # If it is a U+0022 QUOTATION MARK character (")... - # If it is a U+0027 APOSTROPHE character (')... - if ($char === '"' || $char === "'") { - # ... and there is a later U+0022 QUOTATION MARK character (") in s - # ... and there is a later U+0027 APOSTROPHE character (') in s - if (($end = strpos($s, $char, $pos + 1)) !== false) { - $pos++; - return self::fromCharset(substr($s, $pos, $end - $pos)); - } - # If it is an unmatched U+0022 QUOTATION MARK character (") - # If it is an unmatched U+0027 APOSTROPHE character (') - else { - # Return nothing - return null; - } - } - # There is no next character - elseif ($char === "") { - # Return nothing - return null; - } - # Anything else - else { - # Return the result of getting an encoding from the substring - # that consists of this character up to but not including - # the first ASCII whitespace or U+003B SEMICOLON (;) - # character, or the end of s, whichever comes first. - $size = -1; - while (!in_array(@$s[$pos + (++$size)], ["\x09", "\x0A", "\x0C", "\x0D", " ", ";", ""])); - return self::fromCharset(substr($s, $pos, $size)); - } - } - } // @codeCoverageIgnore -} diff --git a/lib/DOM/Comment.php b/lib/Comment.php similarity index 88% rename from lib/DOM/Comment.php rename to lib/Comment.php index 9e2d313..3077eaa 100644 --- a/lib/DOM/Comment.php +++ b/lib/Comment.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; class Comment extends \DOMComment { use LeafNode, Moonwalk, ToString; diff --git a/lib/DOM/DOMException.php b/lib/DOMException.php similarity index 98% rename from lib/DOM/DOMException.php rename to lib/DOMException.php index b6ba2c5..7d3052a 100644 --- a/lib/DOM/DOMException.php +++ b/lib/DOMException.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; class DOMException extends \Exception { // From PHP's DOMException; keeping error codes consistent diff --git a/lib/Data.php b/lib/Data.php deleted file mode 100644 index b6ed3f1..0000000 --- a/lib/Data.php +++ /dev/null @@ -1,289 +0,0 @@ -errorHandler = $errorHandler ?? new ParseError; - $this->filePath = $filePath; - $encodingOrContentType = (string) $encodingOrContentType; - // don't track the current line/column position if erroro reporting has been suppressed - $this->track = (bool) (error_reporting() & \E_USER_WARNING); - - # 13.2.3.2 Determining the character encoding - # User agents must use the following algorithm, called the encoding - # sniffing algorithm, to determine the character encoding to use - # when decoding a document in the first pass. This algorithm takes - # as input any out-of-band metadata available to the user agent - # (e.g. the Content-Type metadata of the document) and all the bytes - # available so far, and returns a character encoding and a confidence - # that is either tentative or certain. - // NOTE: We implement steps 1, 2, 4, 5, and 9 - if ($encoding = Charset::fromBOM($data)) { - # If the result of BOM sniffing is an encoding, return that - # encoding with confidence certain. - $this->encodingCertain = true; - } elseif ($encoding = Charset::fromCharset($encodingOrContentType)) { - # If the user has explicitly instructed the user agent to override - # the document's character encoding with a specific encoding, - # optionally return that encoding with the confidence certain. - $this->encodingCertain = true; - } elseif ($encoding = Charset::fromTransport($encodingOrContentType)) { - # If the transport layer specifies a character encoding, and it is - # supported, return that encoding with the confidence certain. - $this->encodingCertain = true; - } elseif ($encoding = Charset::fromPrescan($data)) { - # Optionally prescan the byte stream to determine its encoding. - # The aforementioned algorithm either aborts unsuccessfully or - # returns a character encoding. If it returns a character - # encoding, then return the same encoding, with confidence - # tentative. - $this->encodingCertain = false; - } else { - # Otherwise, return an implementation-defined or user-specified - # default character encoding, with the confidence tentative. - $encoding = Charset::fromCharset(Parser::$fallbackEncoding) ?? "windows-1252"; - $this->encodingCertain = false; - } - $this->encoding = $encoding; - $this->data = Encoding::createDecoder($encoding, $data, false, true); - } - - public function consume(): string { - $char = $this->data->nextChar(); - # Before the tokenization stage, the input stream must be - # preprocessed by normalizing newlines. - # Thus, newlines in HTML DOMs are represented by U+000A LF characters, - # and there are never any U+000D CR characters in the input to the tokenization stage. - if ($char === "\r") { - // if this is a CR+LF pair, skip the CR and note the normalization - if ($this->data->peekChar() === "\n") { - $char = $this->data->nextChar(); - $this->normalized[$this->data->posChar()] = true; - } - // otherwise just silently change the character to LF; - // the bare CR will be trivial to process when seeking backwards - else { - $char = "\n"; - } - } - // unless we're peeking, track line and column position, and whether we've hit EOF - if ($this->track) { - if ($char === "\n") { - $this->newlines[$this->data->posChar()] = $this->_column; - $this->_column = 0; - $this->_line++; - } elseif ($char === '') { - $this->eof = true; - } else { - $this->_column++; - $len = strlen($char); - $here = $this->data->posChar(); - if ($this->lastError < $here) { - // look for erroneous characters - if ($len === 1) { - $ord = ord($char); - if (($ord < 0x20 && !in_array($ord, [0x0, 0x9, 0xA, 0xC])) || $ord === 0x7F) { - $this->error(ParseError::CONTROL_CHARACTER_IN_INPUT_STREAM); - $this->lastError = $here; - } - } elseif ($len === 2) { - if (ord($char[0]) == 0xC2) { - $ord = ord($char[1]); - if ($ord >= 0x80 && $ord <= 0x9F) { - $this->error(ParseError::CONTROL_CHARACTER_IN_INPUT_STREAM); - $this->lastError = $here; - } - } - } elseif ($len === 3) { - $head = ord($char[0]); - if ($head === 0xED) { - $tail = (ord($char[1]) << 8) + ord($char[2]); - if ($tail >= 0xA080 && $tail <= 0xBFBF) { - $this->error(ParseError::SURROGATE_IN_INPUT_STREAM); - $this->lastError = $here; - } - } elseif ($head === 0xEF) { - $tail = (ord($char[1]) << 8) + ord($char[2]); - if (($tail >= 0xB790 && $tail <= 0xB7AF) || $tail >= 0xBFBE) { - $this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM); - $this->lastError = $here; - } elseif ($tail === 0xBFBD && $this->data->posErr === $here) { - $this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM, $this->data->posByte); - $this->lastError = $here; - } - } - } elseif ($len === 4) { - $tail = (ord($char[2]) << 8) + ord($char[3]); - if ($tail >= 0xBFBE) { - $this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM); - $this->lastError = $here; - } - $this->astrals[$here] = true; - } - } - } - } - return $char; - } - - public function unconsume(int $length = 1, bool $retreatPointer = true): void { - assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length)); - - if ($this->eof) { - $length--; - $this->eof = false; - } - while ($length-- > 0) { - $here = $this->data->posChar(); - // if the previous character was a normalized CR+LF pair, we need to go back two - if (isset($this->normalized[$here])) { - $this->data->seek(-1); - } - // recalculate line and column positions, if requested - if ($retreatPointer && $this->track) { - $col = $this->newlines[$here] ?? 0; - if ($col) { - $this->_column = $col; - $this->_line--; - } else { - $this->_column--; - if ($this->astrals[$here] ?? false) { - $this->_column--; - } - } - } - $this->data->seek(-1); - } - } - - public function consumeWhile(string $match, int $limit = null): string { - $start = $this->data->posChar(); - $out = $this->data->asciiSpan($match, $limit); - if ($this->track) { - $this->_column += ($this->data->posChar() - $start); - } - return $out; - } - - public function consumeUntil(string $match, int $limit = null): string { - $start = $this->data->posChar(); - if ($this->track) { - // control characters produce parse errors - $match .= "\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F"; - $out = $this->data->asciiSpanNot($match."\r\n", $limit); - $this->_column += ($this->data->posChar() - $start); - return $out; - } else { - return $this->data->asciiSpanNot($match."\r\n", $limit); - } - } - - public function peek(int $length = 1): string { - assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length)); - return $this->data->peekChar($length); - } - - /** Returns an indexed array with the line and column positions of the requested offset from the current position */ - public function whereIs(int $relativePos): array { - if ($this->track) { - if ($this->eof) { - $relativePos++; - if ($this->astrals[$this->data->posChar()] ?? false) { - $relativePos++; - } - } - if ($relativePos === 0) { - if (!$this->_column && $this->_line > 1) { - return [$this->_line - 1, $this->newlines[$this->data->posChar()] + 1]; - } else { - return [$this->_line, $this->_column]; - } - } elseif ($relativePos < 0) { - $pos = $this->data->posChar(); - $line = $this->_line; - $col = $this->_column; - do { - // If the current position is the start of a line, - // get the column position of the end of the previous line - if (isset($this->newlines[$pos])) { - $line--; - $col = $this->newlines[$pos]; - // If the newline was a normalized CR+LF pair, - // go back one extra character - if (isset($this->normalized[$pos])) { - $pos--; - } - } else { - $col--; - // supplementary plane characters count as two - if ($this->astrals[$pos] ?? false) { - $this->_column--; - } - } - $pos--; - } while (++$relativePos < 0); - return [$line, $col]; - } else { - return [$this->_line, $this->_column + $relativePos]; - } - } else { - return [0, 0]; - } - } - - public function __get($property) { - switch ($property) { - case 'column': return $this->_column; - break; - case 'line': return $this->_line; - break; - case 'pointer': return $this->data->posChar(); - break; - default: return null; - } - } -} diff --git a/lib/DOM/Document.php b/lib/Document.php similarity index 100% rename from lib/DOM/Document.php rename to lib/Document.php diff --git a/lib/DOM/DocumentFragment.php b/lib/DocumentFragment.php similarity index 90% rename from lib/DOM/DocumentFragment.php rename to lib/DocumentFragment.php index 5a667e8..c7e94fa 100644 --- a/lib/DOM/DocumentFragment.php +++ b/lib/DocumentFragment.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; class DocumentFragment extends \DOMDocumentFragment { use ContainerNode, MoonwalkShallow, ParentNode, ToString, Walk, WalkShallow; diff --git a/lib/DOM/Element.php b/lib/Element.php similarity index 99% rename from lib/DOM/Element.php rename to lib/Element.php index 19b3ac8..50969c2 100644 --- a/lib/DOM/Element.php +++ b/lib/Element.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; class Element extends \DOMElement { use ContainerNode, DocumentOrElement, EscapeString, MagicProperties, Moonwalk, MoonwalkShallow, ParentNode, ToString, Walk, WalkShallow; @@ -13,7 +13,7 @@ class Element extends \DOMElement { public function __get_classList(): ?TokenList { - // MensBeam\HTML\TokenList uses WeakReference to prevent a circular reference, + // MensBeam\HTML\DOM\TokenList uses WeakReference to prevent a circular reference, // so it requires PHP 7.4 to work. if (version_compare(\PHP_VERSION, '7.4.0', '>=')) { // Only create the class list if it is actually used. diff --git a/lib/DOM/ElementMap.php b/lib/ElementMap.php similarity index 98% rename from lib/DOM/ElementMap.php rename to lib/ElementMap.php index 17b3a21..d637eec 100644 --- a/lib/DOM/ElementMap.php +++ b/lib/ElementMap.php @@ -5,7 +5,7 @@ */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; // This is a write-only map of elements which need to be kept in memory; it // exists because values of properties on derived DOM classes are lost unless at diff --git a/lib/Exception.php b/lib/Exception.php deleted file mode 100644 index 1a73c49..0000000 --- a/lib/Exception.php +++ /dev/null @@ -1,109 +0,0 @@ - 'Invalid error code', - 101 => 'Unknown error; escaping', - 102 => 'Incorrect number of parameters for Exception message; %s expected', - 103 => 'Unreachable code', - - 201 => 'Non-empty Document supplied as argument for Parser', - 202 => 'Fragment\'s quirks mode must be one of Parser::NO_QUIRKS_MODE, Parser::LIMITED_QUIRKS_MODE, or Parser::QUIRKS_MODE', - - 301 => 'Invalid Stack index at %s', - 302 => 'Element, Document, or DOMDocumentFragment expected for fragment context', - 303 => 'Element, string, or array expected', - 304 => 'String or array expected', - 305 => 'Stack is incorrectly empty', - 306 => 'Stack is in an invalid state; dump: %s', - 307 => 'No %s context exists in stack', - 308 => 'Stack value is invalid', - 309 => 'Invalid stack offset; offset must be %s', - 310 => 'Root element cannot be deleted from the stack', - - 401 => 'Data string expected; found %s', - 402 => '%s is an invalid data consumption length; a value of 1 or above is expected', - - 501 => 'The Tokenizer has entered an invalid state: %s', - 502 => 'Invalid character reference consumption state: %s', - - 601 => 'Form element expected, found %s', - 602 => 'Element, Document, or DOMDocumentFragment expected; found %s', - 603 => 'Unexpected end of file', - 604 => 'Target document is not empty', - 605 => 'Invalid token class: %s', - 606 => 'Invalid insertion location' - ]; - - public function __construct(int $code, ...$args) { - if (!isset(self::$messages[$code])) { - throw new self(self::INVALID_CODE); - } - - $message = self::$messages[$code]; - $previous = null; - - if ($args) { - // Grab a previous exception if there is one. - if ($args[0] instanceof \Throwable) { - $previous = array_shift($args); - } elseif (end($args) instanceof \Throwable) { - $previous = array_pop($args); - } - } - - // Count the number of replacements needed in the message. - preg_match_all('/(\%(?:\d+\$)?s)/', $message, $matches); - $count = count(array_unique($matches[1])); - - // If the number of replacements don't match the arguments then oops. - if (count($args) !== $count) { - throw new self(self::INCORRECT_PARAMETERS_FOR_MESSAGE, $count); - } - - if ($count > 0) { - // Go through each of the arguments and run sprintf on the strings. - $message = call_user_func_array('sprintf', array_merge([$message], $args)); - } - - parent::__construct($message, $code, $previous); - } -} diff --git a/lib/LoopException.php b/lib/LoopException.php deleted file mode 100644 index 4909e41..0000000 --- a/lib/LoopException.php +++ /dev/null @@ -1,10 +0,0 @@ -nextCode(); - $esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT); - $name = str_replace($c, $esc, $name); - } - // Apply stricter rules to the first character - if (preg_match('/^[^A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m)) { - $c = $m[0]; - $o = (new UTF8($c))->nextCode(); - $esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT); - $name = $esc.substr($name, strlen($c)); - } - return $name; - } - - protected function uncoerceName(string $name): string { - preg_match_all('/U[0-9A-F]{6}/', $name, $m); - foreach (array_unique($m[0], \SORT_STRING) as $o) { - $c = UTF8::encode(hexdec(substr($o, 1))); - $name = str_replace($o, $c, $name); - } - return $name; - } - - protected function escapeString(string $string, bool $attribute = false): string { - # Escaping a string (for the purposes of the algorithm above) consists of - # running the following steps: - - # 1. Replace any occurrence of the "&" character by the string "&". - # 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the - # string " ". - $string = str_replace(['&', "\u{A0}"], ['&', ' '], $string); - # 3. If the algorithm was invoked in the attribute mode, replace any - # occurrences of the """ character by the string """. - # 4. If the algorithm was not invoked in the attribute mode, replace any - # occurrences of the "<" character by the string "<", and any - # occurrences of the ">" character by the string ">". - return ($attribute) ? str_replace('"', '"', $string) : str_replace(['<', '>'], ['<', '>'], $string); - } -} diff --git a/lib/NotImplementedException.php b/lib/NotImplementedException.php deleted file mode 100644 index 5c03f2e..0000000 --- a/lib/NotImplementedException.php +++ /dev/null @@ -1,10 +0,0 @@ - true, - 'dt' => true, - 'li' => true, - 'optgroup' => true, - 'option' => true, - 'p' => true, - 'rb' => true, - 'rp' => true, - 'rt' => true, - 'rtc' => true, - ]; - protected const IMPLIED_END_TAGS_THOROUGH = [ - 'caption' => true, - 'colgroup' => true, - 'dd' => true, - 'dt' => true, - 'li' => true, - 'optgroup' => true, - 'option' => true, - 'p' => true, - 'rb' => true, - 'rp' => true, - 'rt' => true, - 'rtc' => true, - 'tbody' => true, - 'td' => true, - 'tfoot' => true, - 'th' => true, - 'thead' => true, - 'tr' => true, - ]; - protected const GENERAL_SCOPE = [ - Parser::HTML_NAMESPACE => [ - 'applet', - 'caption', - 'html', - 'table', - 'td', - 'th', - 'marquee', - 'object', - 'template' - ], - Parser::MATHML_NAMESPACE => [ - 'mi', - 'mo', - 'mn', - 'ms', - 'mtext', - 'annotation-xml' - ], - Parser::SVG_NAMESPACE => [ - 'foreignObject', - 'desc', - 'title' - ], - ]; - protected const LIST_ITEM_SCOPE = [ - // everything in general scope, and these in the HTML namespace - 'ol', - 'ul', - ]; - protected const BUTTON_SCOPE = [ - // everything in general scope, and these in the HTML namespace - 'button', - ]; - protected const TABLE_SCOPE = [ - Parser::HTML_NAMESPACE => [ - 'html', - 'table', - 'template', - ], - ]; - protected const SELECT_SCOPE = [ - // all elements EXCEPT these - Parser::HTML_NAMESPACE => [ - 'optgroup', - 'option', - ], - ]; - - /** @var ?\DOMElement */ - protected $fragmentContext = null; - /** @var ?\DOMElement */ - public $currentNode = null; - /** @var ?string */ - public $currentNodeName = null; - /** @var ?string */ - public $currentNodeNamespace = null; - /** @var ?\DOMElement */ - public $adjustedCurrentNode = null; - /** @var ?string */ - public $adjustedCurrentNodeName = null; - /** @var ?string */ - public $adjustedCurrentNodeNamespace = null; - - public function __construct(?\DOMElement $fragmentContext = null) { - $this->fragmentContext = $fragmentContext; - } - - public function pop() { - $out = array_pop($this->_storage); - $this->computeProperties(); - return $out; - } - - public function offsetSet($offset, $value) { - assert($offset >= 0, new Exception(Exception::STACK_INVALID_INDEX, $offset)); - - if ($offset === null) { - $this->_storage[] = $value; - } else { - $this->_storage[$offset] = $value; - } - $this->computeProperties(); - } - - public function offsetUnset($offset) { - assert($offset >= 0 && $offset < count($this->_storage), new Exception(Exception::STACK_INVALID_INDEX, $offset)); - array_splice($this->_storage, $offset, 1, []); - $this->computeProperties(); - } - - public function insert(\DOMElement $element, ?int $at = null): void { - assert($at === null || ($at >= 0 && $at <= count($this->_storage)), new Exception(Exception::STACK_INVALID_INDEX, $at)); - if ($at === null) { - $this[] = $element; // @codeCoverageIgnore - } else { - array_splice($this->_storage, $at, 0, [$element]); - } - $this->computeProperties(); - } - - public function popUntil(string ...$target): void { - do { - $node = array_pop($this->_storage); - assert(isset($node), new Exception(Exception::STACK_INCORRECTLY_EMPTY)); - } while ($node->namespaceURI !== null || !in_array($node->nodeName, $target)); - $this->computeProperties(); - } - - public function popUntilSame(\DOMElement $target): void { - do { - $node = array_pop($this->_storage); - } while (!$node->isSameNode($target)); - $this->computeProperties(); - } - - public function find(string ...$name): int { - foreach ($this as $k => $node) { - if ($node->namespaceURI === null && in_array($node->nodeName, $name)) { - return $k; - } - } - return -1; - } - - public function findNot(string ...$name): int { - foreach ($this as $k => $node) { - if ($node->namespaceURI !== null || !in_array($node->nodeName, $name)) { - return $k; - } - } - return -1; - } - - public function findSame(\DOMElement $target): int { - for ($k = (sizeof($this->_storage) - 1); $k > -1; $k--) { - if ($this->_storage[$k]->isSameNode($target)) { - return $k; - } - } - return -1; - } - - public function removeSame(\DOMElement $target): void { - $pos = $this->findSame($target); - if ($pos > -1) { - unset($this[$pos]); - } - } - - public function generateImpliedEndTags(string ...$exclude): void { - # When the steps below require the UA to generate implied end tags, - # then, while the current node is {elided list of element names}, - # the UA must pop the current node off the stack of open elements. - # - # If a step requires the UA to generate implied end tags but lists - # an element to exclude from the process, then the UA must perform - # the above steps as if that element was not in the above list. - $map = self::IMPLIED_END_TAGS; - foreach($exclude as $name) { - $map[$name] = false; - } - while (!$this->isEmpty() && $this->top()->namespaceURI === null && ($map[$this->top()->nodeName] ?? false)) { - array_pop($this->_storage); - $this->count--; - } - $this->computeProperties(); - } - - public function generateImpliedEndTagsThoroughly(): void { - # When the steps below require the UA to generate all implied end tags - # thoroughly, then, while the current node is {elided list of element names}, - # the UA must pop the current node off the stack of open elements. - while (!$this->isEmpty() && $this->top()->namespaceURI === null && (self::IMPLIED_END_TAGS_THOROUGH[$this->top()->nodeName] ?? false)) { - array_pop($this->_storage); - $this->count--; - } - $this->computeProperties(); - } - - public function clearToTableContext(): void { - # When the algorithm requires the UA to clear the stack back to a - # table context, it means that the UA must, while the current node - # is not a table, template, or html element, pop elements from the - # stack of open elements. - assert(count($this->_storage) > 0, new Exception(Exception::STACK_INCORRECTLY_EMPTY)); - $pos = $this->find("table", "template", "html"); - assert($pos > -1, new Exception(Exception::STACK_NO_CONTEXT_EXISTS, 'table')); - $stop = $pos + 1; - while (count($this->_storage) > $stop) { - array_pop($this->_storage); - } - $this->computeProperties(); - } - - public function clearToTableBodyContext(): void { - # When the steps above require the UA to clear the stack back to a - # table body context, it means that the UA must, while the current - # node is not a tbody, tfoot, thead, template, or html element, - # pop elements from the stack of open elements. - assert(count($this->_storage) > 0, new Exception(Exception::STACK_INCORRECTLY_EMPTY)); - $pos = $this->find("tbody", "tfoot", "thead", "template", "html"); - assert($pos > -1, new Exception(Exception::STACK_NO_CONTEXT_EXISTS, 'table body')); - $stop = $pos + 1; - while (count($this->_storage) > $stop) { - array_pop($this->_storage); - } - $this->computeProperties(); - } - - public function clearToTableRowContext(): void { - # When the steps above require the UA to clear the stack back to a - # table row context, it means that the UA must, while the current - # node is not a tr, template, or html element, pop elements from - # the stack of open elements. - assert(count($this->_storage) > 0, new Exception(Exception::STACK_INCORRECTLY_EMPTY)); - $pos = $this->find("tr", "template", "html"); - assert($pos > -1, new Exception(Exception::STACK_NO_CONTEXT_EXISTS, 'table row')); - $stop = $pos + 1; - while (count($this->_storage) > $stop) { - array_pop($this->_storage); - } - $this->computeProperties(); - } - - public function hasElementInScope(...$target): bool { - # The stack of open elements is said to have a particular element in scope when - # it has that element in the specific scope consisting of the following element - # types: - # - # {elided} - return $this->hasElementInScopeHandler($target, self::GENERAL_SCOPE); - } - - public function hasElementInListItemScope(...$target): bool { - $scope = self::GENERAL_SCOPE; - $scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::LIST_ITEM_SCOPE); - return $this->hasElementInScopeHandler($target, $scope); - } - - public function hasElementInButtonScope(...$target): bool { - $scope = self::GENERAL_SCOPE; - $scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::BUTTON_SCOPE); - return $this->hasElementInScopeHandler($target, $scope); - } - - public function hasElementInTableScope(...$target): bool { - return $this->hasElementInScopeHandler($target, self::TABLE_SCOPE); - } - - public function hasElementInSelectScope(...$target): bool { - # The stack of open elements is said to have a particular element - # in select scope when it has that element in the specific scope - # consisting of all element types EXCEPT the following: - # - # optgroup in the HTML namespace - # option in the HTML namespace - return $this->hasElementInScopeHandler($target, self::SELECT_SCOPE, false); - } - - protected function hasElementInScopeHandler(array $targets, array $list, $matchType = true): bool { - # The stack of open elements is said to have an element target node - # in a specific scope consisting of a list of element types list - # when the following algorithm terminates in a match state: - # Initialize node to be the current node (the bottommost node of the stack). - foreach ($this as $node) { - # If node is the target node, terminate in a match state. - foreach ($targets as $target) { - if ($target instanceof \DOMElement) { - if ($node->isSameNode($target)) { - return true; - } - } else { - if ($node->namespaceURI === null && $node->nodeName === $target) { - return true; - } - } - } - # Otherwise, if node is one of the element types in list, terminate in a failure state. - $ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE; - if (in_array($node->nodeName, $list[$ns] ?? []) === $matchType) { - return false; - } - # Otherwise, set node to the previous entry in the stack of - # open elements and return to step 2. (This will never fail, - # since the loop will always terminate in the previous step - # if the top of the stack — an html element — is reached.) - } - assert(false, new Exception(Exception::STACK_INVALID_STATE, (string)$this)); // @codeCoverageIgnore - } // @codeCoverageIgnore - - protected function computeProperties(): void { - $this->count = count($this->_storage); - $this->currentNode = $this->top(); - # The adjusted current node is the context element if the parser was created by - # the HTML fragment parsing algorithm and the stack of open elements has only one - # element in it (fragment case); otherwise, the adjusted current node is the - # current node. - if ($this->fragmentContext && $this->count === 1) { - $this->adjustedCurrentNode = $this->fragmentContext; - } else { - $this->adjustedCurrentNode = $this->currentNode; - } - if ($this->currentNode) { - $this->currentNodeName = $this->currentNode->nodeName; - $this->currentNodeNamespace = $this->currentNode->namespaceURI; - } else { - $this->currentNodeName = null; // @codeCoverageIgnore - $this->currentNodeNamespace = null; // @codeCoverageIgnore - } - if ($this->adjustedCurrentNode) { - $this->adjustedCurrentNodeName = $this->adjustedCurrentNode->nodeName; - $this->adjustedCurrentNodeNamespace = $this->adjustedCurrentNode->namespaceURI; - } else { - $this->adjustedCurrentNodeName = null; // @codeCoverageIgnore - $this->adjustedCurrentNodeNamespace = null; // @codeCoverageIgnore - } - } - - public function __toString(): string { - $out = []; - foreach ($this as $node) { - $ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE; - $prefix = Parser::NAMESPACE_MAP[$ns] ?? "?"; - $prefix .= $prefix ? " " : ""; - $out[] = $prefix.$node->nodeName; - } - return implode(" < ", $out); - } -} diff --git a/lib/ParseError.php b/lib/ParseError.php deleted file mode 100644 index c65cac6..0000000 --- a/lib/ParseError.php +++ /dev/null @@ -1,215 +0,0 @@ - 'Expected DOCTYPE but got start tag <%s>', - self::EXPECTED_DOCTYPE_BUT_GOT_END_TAG => 'Expected DOCTYPE but got end tag ', - self::EXPECTED_DOCTYPE_BUT_GOT_CHARS => 'Expected DOCTYPE but got characters', - self::EXPECTED_DOCTYPE_BUT_GOT_EOF => 'Expected DOCTYPE but got end-of-file', - self::UNKNOWN_DOCTYPE => 'Unknown DOCTYPE', - self::UNEXPECTED_START_TAG => 'Unexpected start tag <%s>', - self::UNEXPECTED_END_TAG => 'Unexpected end tag ', - self::NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS => 'Trailing solidus in non-void HTML element start tag <%s>', - self::UNEXPECTED_START_TAG_IMPLIES_END_TAG => 'Unexpcted non-nesting start tag <%s> in nested context', - self::UNEXPECTED_START_TAG_ALIAS => 'Start tag <%s> should be <%s>', - self::UNEXPECTED_CHAR => 'Unexpected character data', - self::UNEXPECTED_EOF => 'Unexpected end of file', - self::UNEXPECTED_PARENT => 'Start tag <%s> not valid in parent <%s>', - self::INVALID_NAMESPACE_ATTRIBUTE_VALUE => 'Invalid value for attribute "%s"; it must have value "%s" or be omitted', - self::FOSTERED_START_TAG => 'Start tag <%s> moved to before table', - self::FOSTERED_END_TAG => 'End tag moved to before table', - self::FOSTERED_CHAR => 'Character moved to before table', - - self::ENCODING_ERROR => 'Corrupt encoding near byte position %s', - self::UNEXPECTED_NULL_CHARACTER => 'Unexpected null character', - self::UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME => 'Unexpected "?" character instead of tag name', - self::EOF_BEFORE_TAG_NAME => 'End-of-file before tag name', - self::INVALID_FIRST_CHARACTER_OF_TAG_NAME => 'Invalid first character "%s" of tag name', - self::MISSING_END_TAG_NAME => 'Missing end-tag name', - self::EOF_IN_TAG => 'End-of-file in tag', - self::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT => 'End-of-file in script (HTML comment-like) text', - self::UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME => 'Unexpected equals sign before attribute name', - self::DUPLICATE_ATTRIBUTE => 'Duplicate attribute "%s" in start tag', - self::UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME => 'Unexpected character "%s" in attribute name', - self::MISSING_ATTRIBUTE_VALUE => 'Missing attribute value', - self::UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE => 'Unexpected character "%s" in unquoted attribute value', - self::MISSING_WHITESPACE_BETWEEN_ATTRIBUTES => 'Missing whitespace between attributes', - self::UNEXPECTED_SOLIDUS_IN_TAG => 'Unexpected solidus in tag', - self::CDATA_IN_HTML_CONTENT => 'CDATA in HTML content', - self::INCORRECTLY_OPENED_COMMENT => 'Incorrectly opened comment', - self::ABRUPT_CLOSING_OF_EMPTY_COMMENT => 'Abrupt closing of empty comment', - self::EOF_IN_COMMENT => 'End-of-file in comment', - self::NESTED_COMMENT => 'Nested comment', - self::INCORRECTLY_CLOSED_COMMENT => 'Incorrectly closed comment', - self::EOF_IN_DOCTYPE => 'End-of-file in DOCTYPE', - self::MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME => 'Missing whitespace before DOCTYPE name', - self::MISSING_DOCTYPE_NAME => 'Missing DOCTYPE name', - self::INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME => 'Invalid character sequence after DOCTYPE name', - self::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD => 'Missing whitespace after DOCTYPE "PUBLIC" keyword', - self::MISSING_DOCTYPE_PUBLIC_IDENTIFIER => 'Missing DOCTYPE "PUBLIC" identifier', - self::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER => 'Missing quote before DOCTYPE "PUBLIC" identifier', - self::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER => 'Abrupt DOCTYPE "PUBLIC" identifier', - self::MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS => 'Missing whitespace between DOCTYPE "PUBLIC" and "SYSTEM" identifiers', - self::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD => 'Missing whitespace after DOCTYPE "SYSTEM" keyword', - self::MISSING_DOCTYPE_SYSTEM_IDENTIFIER => 'Missing DOCTYPE "SYSTEM" identifier', - self::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER => 'Missing quote before DOCTYPE "SYSTEM" identifier', - self::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER => 'Abrupt DOCTYPE "SYSTEM" identifier', - self::UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER => 'Unexpected character "%s" after DOCTYPE "SYSTEM" identifier', - self::EOF_IN_CDATA => 'End-of-file in CDATA section', - self::END_TAG_WITH_ATTRIBUTES => 'End-tag with attributes', - self::END_TAG_WITH_TRAILING_SOLIDUS => 'End-tag with trailing solidus', - self::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE => 'Missing semicolon after character reference', - self::UNKNOWN_NAMED_CHARACTER_REFERENCE => 'Unknown named character reference "%s"', - self::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE => 'Absence of digits in character reference', - self::NULL_CHARACTER_REFERENCE => 'Null character reference', - self::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE => 'Character reference outside Unicode range', - self::SURROGATE_CHARACTER_REFERENCE => 'Surrogate character rereference', - self::NONCHARACTER_CHARACTER_REFERENCE => 'Non-character character reference', - self::CONTROL_CHARACTER_REFERENCE => 'Control-character character reference', - self::SURROGATE_IN_INPUT_STREAM => 'Surrogate character in input stream', - self::NONCHARACTER_IN_INPUT_STREAM => 'Non-character character in input stream', - self::CONTROL_CHARACTER_IN_INPUT_STREAM => 'Control character in input stream', - ]; - - const REPORT_OFFSETS = [ - self::INCORRECTLY_OPENED_COMMENT => 1, - self::SURROGATE_CHARACTER_REFERENCE => 1, - self::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE => 1, - self::NONCHARACTER_CHARACTER_REFERENCE => 1, - self::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE => 1, - self::NULL_CHARACTER_REFERENCE => 1, - self::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE => 1, - self::CONTROL_CHARACTER_REFERENCE => 1, - self::UNKNOWN_NAMED_CHARACTER_REFERENCE => 1, - ]; - - public function setHandler() { - // Set the errror handler and honor already-set error reporting rules. - set_error_handler([$this, 'errorHandler'], \E_USER_WARNING); - } - - public function clearHandler() { - restore_error_handler(); - } - - protected function prepareMessage(string $file, int $line, int $column, int $code, ...$arg): string { - assert(isset(self::MESSAGES[$code]), new Exception(Exception::INVALID_CODE)); - - $message = self::MESSAGES[$code]; - // Count the number of replacements needed in the message. - $count = substr_count($message, '%s'); - // If the number of replacements don't match the arguments then oops. - assert(count($arg) === $count, new Exception(Exception::INCORRECT_PARAMETERS_FOR_MESSAGE, $count)); - - if ($count > 0) { - // Convert newlines and tabs in the arguments to words to better - // express what they are. - $arg = array_map(function($value) { - if ($value === "\n") { - return 'Newline'; - } elseif ($value === "\t") { - return 'Tab'; - } elseif ($value === null) { - return 'nothing'; - } else { - return $value; - } - }, $arg); - - // Go through each of the arguments and run sprintf on the strings. - $message = sprintf($message, ...$arg); - } - // Wrap with preamble and location - // TODO: the file path should be middle-elided when necessary so that - // the message does not exceed 1024 bytes - $message = sprintf("HTML5 Parse Error: \"%s\" in %s", $message, $file); - if ($line) { - $message .= sprintf(" on line %s, column %s", $line, $column); - } - return $message; - } - - public function emit(string $file, int $line, int $column, int $code, ...$arg): bool { - return trigger_error($this->prepareMessage($file, $line, $column, $code, ...$arg), \E_USER_WARNING); - } - - public function errorHandler(int $code, string $message) { - echo "$message\n"; - } -} diff --git a/lib/ParseErrorDummy.php b/lib/ParseErrorDummy.php deleted file mode 100644 index 34f5956..0000000 --- a/lib/ParseErrorDummy.php +++ /dev/null @@ -1,21 +0,0 @@ -data ?? null); - assert($data instanceof Data); - assert($this->errorHandler instanceof ParseError); - list($line, $column) = $data->whereIs(ParseError::REPORT_OFFSETS[$code] ?? 0); - return $this->errorHandler->emit($data->filePath, $line, $column, $code, ...$arg); - } -} diff --git a/lib/Parser.php b/lib/Parser.php deleted file mode 100644 index 1501b39..0000000 --- a/lib/Parser.php +++ /dev/null @@ -1,100 +0,0 @@ - "", - self::MATHML_NAMESPACE => "math", - self::SVG_NAMESPACE => "svg", - self::XLINK_NAMESPACE => "xlink", - self::XML_NAMESPACE => "xml", - self::XMLNS_NAMESPACE => "xmlns", - ]; - - public static function parse(string $data, ?\DOMDocument $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): \DOMDocument { - // Initialize the various classes needed for parsing - $document = $document ?? new \DOMDocument; - if ((error_reporting() & \E_USER_WARNING)) { - $errorHandler = new ParseError; - } else { - $errorHandler = new ParseErrorDummy; - } - $decoder = new Data($data, $file ?? "STDIN", $errorHandler, $encodingOrContentType); - $document->documentEncoding = $decoder->encoding; - $stack = new OpenElementsStack($fragmentContext); - $tokenizer = new Tokenizer($decoder, $stack, $errorHandler); - $tokenList = $tokenizer->tokenize(); - $treeBuilder = new TreeBuilder($document, $decoder, $tokenizer, $tokenList, $errorHandler, $stack, new TemplateInsertionModesStack, $fragmentContext); - // Override error handling - $errorHandler->setHandler(); - try { - // run the parser to completion - $treeBuilder->constructTree(); - } finally { - // Restore error handling - $errorHandler->clearHandler(); - } - return $document; - } - - public static function parseFragment(string $data, ?\DOMDocument $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): DocumentFragment { - // Create the requisite parsing context if none was supplied - $document = $document ?? new \DOMDocument; - $tempDocument = new \DOMDocument; - $fragmentContext = $fragmentContext ?? $document->createElement("div"); - // parse the fragment into the temporary document - self::parse($data, $tempDocument, $encodingOrContentType, $fragmentContext, $file); - // extract the nodes from the temp document into a fragment - $fragment = $document->createDocumentFragment(); - foreach ($tempDocument->documentElement->childNodes as $node) { - $node = $document->importNode($node, true); - $fragment->appendChild($node); - } - return $fragment; - } - - public static function fetchFile(string $file, ?string $encodingOrContentType = null): ?array { - $f = fopen($file, "r"); - if (!$f) { - return null; - } - $data = stream_get_contents($f); - $encoding = Charset::fromCharset((string) $encodingOrContentType) ?? Charset::fromTransport((string) $encodingOrContentType); - if (!$encoding) { - $meta = stream_get_meta_data($f); - if ($meta['wrapper_type'] === "http") { - // Try to find a Content-Type header-field - foreach ($meta['wrapper_data'] as $h) { - $h = explode(":", $h, 2); - if (count($h) === 2) { - if (preg_match("/^\s*Content-Type\s*$/i", $h[0])) { - // Try to get an encoding from it - $encoding = Charset::fromTransport($h[1]); - break; - } - } - } - } - } - return [$data, $encoding]; - } -} diff --git a/lib/DOM/ProcessingInstruction.php b/lib/ProcessingInstruction.php similarity index 89% rename from lib/DOM/ProcessingInstruction.php rename to lib/ProcessingInstruction.php index 878dafc..7d34e5b 100644 --- a/lib/DOM/ProcessingInstruction.php +++ b/lib/ProcessingInstruction.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; class ProcessingInstruction extends \DOMProcessingInstruction { use LeafNode, Moonwalk, ToString; diff --git a/lib/Stack.php b/lib/Stack.php deleted file mode 100644 index 01b77b2..0000000 --- a/lib/Stack.php +++ /dev/null @@ -1,62 +0,0 @@ -= 0, new Exception(Exception::STACK_INVALID_INDEX, $offset)); - - if ($offset === null) { - $this->_storage[] = $value; - } else { - $this->_storage[$offset] = $value; // @codeCoverageIgnore - } - $this->count = count($this->_storage); - } - - public function offsetExists($offset) { - return isset($this->_storage[$offset]); - } - - public function offsetUnset($offset) { - assert($offset >= 0 && $offset < count($this->_storage), new Exception(Exception::STACK_INVALID_INDEX, $offset)); - array_splice($this->_storage, $offset, 1, []); - $this->count = count($this->_storage); - } - - public function offsetGet($offset) { - assert($offset >= 0 && $offset < count($this->_storage), new Exception(Exception::STACK_INVALID_INDEX, $offset)); - return $this->_storage[$offset]; - } - - public function count(): int { - return $this->count; - } - - public function getIterator(): \Traversable { - for ($a = $this->count - 1; $a > -1; $a--) { - yield $a => $this->_storage[$a]; - } - } - - public function pop() { - $this->count = max($this->count - 1, 0); - return array_pop($this->_storage); - } - - public function isEmpty(): bool { - return !$this->_storage; - } - - public function top(int $offset = 0) { - assert($offset >= 0, new Exception(Exception::STACK_INVALID_OFFSET, '<= 0')); - return ($c = $this->count) > $offset ? $this->_storage[$c - ($offset + 1)] : null; - } -} diff --git a/lib/DOM/TemplateElement.php b/lib/TemplateElement.php similarity index 97% rename from lib/DOM/TemplateElement.php rename to lib/TemplateElement.php index d219ddc..51db140 100644 --- a/lib/DOM/TemplateElement.php +++ b/lib/TemplateElement.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; /** Class specifically for template elements to handle its content property. */ class TemplateElement extends Element { diff --git a/lib/TemplateInsertionModesStack.php b/lib/TemplateInsertionModesStack.php deleted file mode 100644 index 342e3f3..0000000 --- a/lib/TemplateInsertionModesStack.php +++ /dev/null @@ -1,19 +0,0 @@ -isEmpty() ? null : $this->top(); - default: - return null; // @codeCoverageIgnore - } - } -} diff --git a/lib/DOM/Text.php b/lib/Text.php similarity index 88% rename from lib/DOM/Text.php rename to lib/Text.php index 3736a8e..7feb404 100644 --- a/lib/DOM/Text.php +++ b/lib/Text.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; class Text extends \DOMText { use LeafNode, Moonwalk, ToString; diff --git a/lib/Token.php b/lib/Token.php deleted file mode 100644 index 267aea1..0000000 --- a/lib/Token.php +++ /dev/null @@ -1,120 +0,0 @@ -data = $data; - } -} - -class DOCTYPEToken extends Token { - public const NAME = "DOCTYPE token"; - - # DOCTYPE tokens have a name, a public identifier, - # a system identifier, and a force-quirks flag. - # When a DOCTYPE token is created, its name, - # public identifier, and system identifier must - # be marked as missing (which is a distinct state - # from the empty string), and the force-quirks flag - # must be set to off (its other state is on). - public $forceQuirks = false; - public $name; - public $public; - public $system; - - public function __construct(?string $name = null, ?string $public = null, ?string $system = null) { - // null stands in for the distinct "missing" state - $this->name = $name; - $this->public = $public; - $this->system = $system; - } -} - -class CharacterToken extends DataToken { - public const NAME = "Character token"; -} - -class WhitespaceToken extends CharacterToken {} - -class NullCharacterToken extends CharacterToken {} - -class CommentToken extends DataToken { - public const NAME = "Comment token"; - - public function __construct(string $data = '') { - parent::__construct($data); - } -} - -abstract class TagToken extends Token { - # Start and end tag tokens have a tag name, - # a self-closing flag, and a list of attributes, - # each of which has a name and a value. - # When a start or end tag token is created, its - # self-closing flag must be unset (its other state - # is that it be set), and its attributes list must be empty. - public $name; - public $namespace; - public $selfClosing; - public $selfClosingAcknowledged = false; - public $attributes = []; - - public function __construct(string $name, bool $selfClosing = false, ?string $namespace = null) { - $this->selfClosing = $selfClosing; - $this->namespace = $namespace; - $this->name = $name; - } - - public function hasAttribute(string $name): bool { - return ($this->_getAttributeKey($name) !== null); - } - - public function getAttribute(string $name) { - $key = $this->_getAttributeKey($name); - return (isset($this->attributes[$key])) ? $this->attributes[$key] : null; - } - - private function _getAttributeKey(string $name) { - foreach ($this->attributes as $key => $a) { - if ($a->name === $name) { - return $key; - } - } - return null; - } -} - -class StartTagToken extends TagToken { - public const NAME = "Start tag token"; -} - -class EndTagToken extends TagToken { - public const NAME = "End tag token"; -} - -class EOFToken extends Token { - public const NAME = "EOF token"; -} - -class TokenAttr { - /** @var string The name of the attribute */ - public $name; - /** @var string The attribute's value */ - public $value; - /** @var string|null The attribute's namespace. This is normally null but may be set during tree construction */ - public $namespace = null; - - public function __construct(string $name, string $value) { - $this->name = $name; - $this->value = $value; - } -} diff --git a/lib/DOM/TokenList.php b/lib/TokenList.php similarity index 99% rename from lib/DOM/TokenList.php rename to lib/TokenList.php index aa8de20..710e4da 100644 --- a/lib/DOM/TokenList.php +++ b/lib/TokenList.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; class TokenList implements \ArrayAccess, \Countable, \Iterator { use MagicProperties; diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php deleted file mode 100644 index 47187a6..0000000 --- a/lib/Tokenizer.php +++ /dev/null @@ -1,3699 +0,0 @@ - "Data", - self::RCDATA_STATE => "RCDATA", - self::RAWTEXT_STATE => "RAWTEXT", - self::SCRIPT_DATA_STATE => "Script data", - self::PLAINTEXT_STATE => "PLAINTEXT", - self::TAG_OPEN_STATE => "Tag open", - self::END_TAG_OPEN_STATE => "End tag open", - self::TAG_NAME_STATE => "Tag name", - self::RCDATA_LESS_THAN_SIGN_STATE => "RCDATA less-than sign", - self::RCDATA_END_TAG_OPEN_STATE => "RCDATA end tag open", - self::RCDATA_END_TAG_NAME_STATE => "RCDATA end tag name", - self::RAWTEXT_LESS_THAN_SIGN_STATE => "RAWTEXT less than sign", - self::RAWTEXT_END_TAG_OPEN_STATE => "RAWTEXT end tag open", - self::RAWTEXT_END_TAG_NAME_STATE => "RAWTEXT end tag name", - self::SCRIPT_DATA_LESS_THAN_SIGN_STATE => "Script data less-than sign", - self::SCRIPT_DATA_END_TAG_OPEN_STATE => "Script data end tag open", - self::SCRIPT_DATA_END_TAG_NAME_STATE => "Script data end tag name", - self::SCRIPT_DATA_ESCAPE_START_STATE => "Script data escape start", - self::SCRIPT_DATA_ESCAPE_START_DASH_STATE => "Script data escape start dash", - self::SCRIPT_DATA_ESCAPED_STATE => "Script data escaped", - self::SCRIPT_DATA_ESCAPED_DASH_STATE => "Script data escaped dash", - self::SCRIPT_DATA_ESCAPED_DASH_DASH_STATE => "Script data escaped dash dash", - self::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE => "Script data escaped less-than sign", - self::SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE => "Script data escaped end tag open", - self::SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE => "Script data escaped end tag name", - self::SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE => "Script data double escape start", - self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE => "Script data double escaped", - self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE => "Script data double escaped dash", - self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE => "Script data double escaped dash dash", - self::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE => "Script data double escaped less-than sign", - self::SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE => "Script data double escape end", - self::BEFORE_ATTRIBUTE_NAME_STATE => "Before attribute", - self::ATTRIBUTE_NAME_STATE => "Attribute name", - self::AFTER_ATTRIBUTE_NAME_STATE => "After attribute name", - self::BEFORE_ATTRIBUTE_VALUE_STATE => "Before attribute value", - self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE => "Attribute value (double quoted)", - self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE => "Attribute value (single quoted)", - self::ATTRIBUTE_VALUE_UNQUOTED_STATE => "Attribute value (unquoted)", - self::AFTER_ATTRIBUTE_VALUE_QUOTED_STATE => "After attribute value (quoted)", - self::SELF_CLOSING_START_TAG_STATE => "Self-closing start tag", - self::BOGUS_COMMENT_STATE => "Bogus comment", - self::MARKUP_DECLARATION_OPEN_STATE => "Markup declaration open", - self::COMMENT_START_STATE => "Comment start", - self::COMMENT_START_DASH_STATE => "Comment start dash", - self::COMMENT_STATE => "Comment", - self::COMMENT_LESS_THAN_SIGN_STATE => "Comment less-than sign", - self::COMMENT_LESS_THAN_SIGN_BANG_STATE => "Comment less-than sign bang", - self::COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE => "Comment less-than sign bang dash", - self::COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE => "Comment less-than sign bang dash dash", - self::COMMENT_END_DASH_STATE => "Comment end dash", - self::COMMENT_END_STATE => "Comment end", - self::COMMENT_END_BANG_STATE => "Comment end bang", - self::DOCTYPE_STATE => "DOCTYPE", - self::BEFORE_DOCTYPE_NAME_STATE => "Before DOCTYPE name", - self::DOCTYPE_NAME_STATE => "DOCTYPE name", - self::AFTER_DOCTYPE_NAME_STATE => "After DOCTYPE name", - self::AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE => "After DOCTYPE public keyword", - self::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE => "Before DOCTYPE public identifier", - self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE => "DOCTYPE public identifier (double quoted)", - self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE => "DOCTYPE public identifier (single quoted)", - self::AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE => "After DOCTYPE public identifier", - self::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE => "Between DOCTYPE public and system identifiers", - self::AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE => "After DOCTYPE system keyword", - self::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE => "Before DOCTYPE system identifier", - self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE => "DOCTYPE system identifier (double-quoted)", - self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE => "DOCTYPE system identifier (single-quoted)", - self::AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE => "After DOCTYPE system identifier", - self::BOGUS_DOCTYPE_STATE => "Bogus DOCTYPE", - self::CDATA_SECTION_STATE => "CDATA section", - self::CDATA_SECTION_BRACKET_STATE => "CDATA section bracket", - self::CDATA_SECTION_END_STATE => "CDATA section end", - self::CHARACTER_REFERENCE_STATE => "Character reference", - self::NAMED_CHARACTER_REFERENCE_STATE => "Named character reference", - self::AMBIGUOUS_AMPERSAND_STATE => "Ambiguous ampersand", - self::NUMERIC_CHARACTER_REFERENCE_STATE => "Numeric character reference", - self::HEXADECIMAL_CHARACTER_REFERENCE_START_STATE => "Hexadecimal character reference start", - self::DECIMAL_CHARACTER_REFERENCE_START_STATE => "Decimal character reference start", - self::HEXADECIMAL_CHARACTER_REFERENCE_STATE => "Hexadecimal character reference", - self::DECIMAL_CHARACTER_REFERENCE_STATE => "Decimal character reference", - self::NUMERIC_CHARACTER_REFERENCE_END_STATE => "Numeric character reference", - ]; - - const ATTRIBUTE_VALUE_STATE_SET = [ - # A character reference is said to be consumed as part of an attribute - # if the return state is either attribute value (double-quoted) state, - # attribute value (single-quoted) state or attribute value (unquoted) state. - self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE, - self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE, - self::ATTRIBUTE_VALUE_UNQUOTED_STATE - ]; - - // Ctype constants - const CTYPE_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - const CTYPE_ALPHA = self::CTYPE_UPPER.'abcdefghijklmnopqrstuvwxyz'; - const CTYPE_NUM = '0123456789'; - const CTYPE_ALNUM = self::CTYPE_ALPHA.self::CTYPE_NUM; - const CTYPE_HEX = self::CTYPE_NUM.'ABCDEFabcdef'; - - public function __construct(Data $data, OpenElementsStack $stack, ParseError $errorHandler) { - $this->state = self::DATA_STATE; - $this->data = $data; - $this->stack = $stack; - $this->errorHandler = $errorHandler; - } - - protected function sanitizeTag(TagToken $token): void { - if ($token instanceof EndTagToken) { - # When an end tag token is emitted with attributes, - # that is an end-tag-with-attributes parse error. - if ($token->attributes) { - $this->error(ParseError::END_TAG_WITH_ATTRIBUTES); - $token->attributes = []; - } - # When an end tag token is emitted with its self-closing - # flag set, that is an end-tag-with-trailing-solidus parse error. - if ($token->selfClosing) { - $this->error(ParseError::END_TAG_WITH_TRAILING_SOLIDUS); - $token->selfClosing = false; - } - } - - } - - protected function keepOrDiscardAttribute(TagToken $token, TokenAttr $attribute): void { - // See 13.2.5.33 Attribute name state - - # When the user agent leaves the attribute name state - # (and before emitting the tag token, if appropriate), - # the complete attribute's name must be compared to the - # other attributes on the same token; if there is already - # an attribute on the token with the exact same name, - # then this is a duplicate-attribute parse error and the - # new attribute must be removed from the token. - if ($token->hasAttribute($attribute->name)) { - $this->error(ParseError::DUPLICATE_ATTRIBUTE, $attribute->name); - } else { - $token->attributes[] = $attribute; - } - } - - public function tokenize(): \Generator { - Consume: - assert((function() { - $this->debugLog .= "TOKEN ".++$this->debugCount."\n"; - return true; - })()); - - while (true) { - // OPTIMIZATION: All but one state consumes; we instead do so - // here unless the state is the exception; this allows us to - // reconsume more efficiently when needed - if ($this->state !== self::MARKUP_DECLARATION_OPEN_STATE) { - $char = $this->data->consume(); - } - Reconsume: - - assert((function() use ($char) { - $state = self::STATE_NAMES[$this->state] ?? $this->state; - $this->debugLog .= " State: $state ($char)\n"; - return true; - })()); - - # 13.2.5.1 Data state - if ($this->state === self::DATA_STATE) { - # Consume the next input character - - # U+0026 AMPERSAND (&) - if ($char === '&') { - # Set the return state to the data state. - # Switch to the character reference state. - - // DEVIATION: Character reference consumption implemented as a function - $outChar = $this->switchToCharacterReferenceState(self::DATA_STATE); - if (strspn($outChar, Data::WHITESPACE)) { - yield new WhitespaceToken($outChar); // a character reference is either all whitespace is no whitespace - } else { - yield new CharacterToken($outChar); - } - } - # U+003C LESS-THAN SIGN (<) - elseif ($char === '<') { - # Switch to the tag open state. - $this->state = self::TAG_OPEN_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Emit the current input character as a character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - yield new NullCharacterToken($char); - } - # EOF - elseif ($char === '') { - # Emit an end-of-file token. - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that don't match what is above and emit - // that as a character token instead to prevent having to loop back - // through here every single time. - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("&<\0")); - } - } - } - - # 13.2.5.2 RCDATA state - elseif ($this->state === self::RCDATA_STATE) { - # Consume the next input character - - # U+0026 AMPERSAND (&) - if ($char === '&') { - # Set the return state to the RCDATA state. - # Switch to the character reference state. - - // DEVIATION: Character reference consumption implemented as a function - $outChar = $this->switchToCharacterReferenceState(self::RCDATA_STATE); - if (strspn($outChar, Data::WHITESPACE)) { - yield new WhitespaceToken($outChar); // a character reference is either all whitespace is no whitespace - } else { - yield new CharacterToken($outChar); - } - } - # U+003C LESS-THAN SIGN (<) - elseif ($char === '<') { - # Switch to the RCDATA less-than sign state. - $this->state = self::RCDATA_LESS_THAN_SIGN_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # Emit an end-of-file token. - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that don't match what is above and emit - // that as a character token instead to prevent having to loop back - // through here every single time. - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("&<\0")); - } - } - } - - # 13.2.5.3 RAWTEXT state - elseif ($this->state === self::RAWTEXT_STATE) { - # Consume the next input character - - # U+003C LESS-THAN SIGN (<) - if ($char === '<') { - # Switch to the RAWTEXT less-than sign state. - $this->state = self::RAWTEXT_LESS_THAN_SIGN_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # Emit an end-of-file token. - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that don't match what is above and emit - // that as a character token instead to prevent having to loop back - // through here every single time. - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("<\0")); - } - } - } - - # 13.2.5.4 Script data state - elseif ($this->state === self::SCRIPT_DATA_STATE) { - # Consume the next input character - - # U+003C LESS-THAN SIGN (<) - if ($char === '<') { - # Switch to the script data less-than sign state. - $this->state = self::SCRIPT_DATA_LESS_THAN_SIGN_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # Emit an end-of-file token. - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that don't match what is above and emit - // that as a character token instead to prevent having to loop back - // through here every single time. - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("<\0")); - } - } - } - - # 13.2.5.5 PLAINTEXT state - elseif ($this->state === self::PLAINTEXT_STATE) { - # Consume the next input character - - # U+0000 NULL - if ($char === "\0") { - # This is an unexpected-null-character parse error. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # Emit an end-of-file token. - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that don't match what is above and emit - // that as a character token instead to prevent having to loop back - // through here every single time. - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("\0")); - } - } - } - - # 13.2.5.6 Tag open state - elseif ($this->state === self::TAG_OPEN_STATE) { - # Consume the next input character - - # U+0021 EXCLAMATION MARK (!) - if ($char === '!') { - # Switch to the markup declaration open state. - $this->state = self::MARKUP_DECLARATION_OPEN_STATE; - } - # U+002F SOLIDUS (/) - elseif ($char === '/') { - # Switch to the end tag open state. - $this->state = self::END_TAG_OPEN_STATE; - } - # ASCII alpha - elseif (ctype_alpha($char)) { - # Create a new start tag token, set its tag name to the empty string. - # Reconsume in the tag name state. - - // OPTIMIZATION: - // Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $token = new StartTagToken(strtolower($char.$this->data->consumeWhile(self::CTYPE_ALPHA))); - $this->state = self::TAG_NAME_STATE; - } - # U+003F QUESTION MARK (?) - elseif ($char === '?') { - # This is an unexpected-question-mark-instead-of-tag-name parse error. - # Create a comment token whose data is the empty string. - # Reconsume in the bogus comment state. - $this->error(ParseError::UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME); - $token = new CommentToken(''); - $this->state = self::BOGUS_COMMENT_STATE; - goto Reconsume; - } - # EOF - elseif ($char === '') { - # This is an eof-before-tag-name parse error. - # Emit a U+003C LESS-THAN SIGN character token and an end-of-file token. - $this->error(ParseError::EOF_BEFORE_TAG_NAME); - yield new CharacterToken('<'); - yield new EOFToken; - return; - } - # Anything else - else { - # This is an invalid-first-character-of-tag-name parse error. - # Emit a U+003C LESS-THAN SIGN character token. - # Reconsume in the data state. - $this->error(ParseError::INVALID_FIRST_CHARACTER_OF_TAG_NAME, $char); - $this->state = self::DATA_STATE; - yield new CharacterToken('<'); - goto Reconsume; - } - } - - # 13.2.5.7 End tag open state - elseif ($this->state === self::END_TAG_OPEN_STATE) { - # Consume the next input character - - # ASCII alpha - if (ctype_alpha($char)) { - # Create a new end tag token, set its tag name to the empty string. - # Reconsume in the tag name state. - - // OPTIMIZATION: - // Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $token = new EndTagToken(strtolower($char.$this->data->consumeWhile(self::CTYPE_ALPHA))); - $this->state = self::TAG_NAME_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is a missing-end-tag-name parse error. - # Switch to the data state. - $this->error(ParseError::MISSING_END_TAG_NAME); - $this->state = self::DATA_STATE; - } - # EOF - elseif ($char === '') { - # This is an eof-before-tag-name parse error. - # Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character token and an end-of-file token. - // Making errors more expressive. - $this->error(ParseError::EOF_BEFORE_TAG_NAME); - yield new CharacterToken('error(ParseError::INVALID_FIRST_CHARACTER_OF_TAG_NAME, $char); - $token = new CommentToken(); - $this->state = self::BOGUS_COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.8 Tag name state - elseif ($this->state === self::TAG_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # Switch to the before attribute name state. - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - } - # "/" (U+002F) - elseif ($char === '/') { - # Switch to the self-closing start tag state. - $this->state = self::SELF_CLOSING_START_TAG_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. Emit the current tag token. - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } - # Uppercase ASCII letter - elseif (ctype_upper($char)) { - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the current - # tag token's tag name. - - // OPTIMIZATION: - // Consume all characters that are Uppercase ASCII characters to - // prevent having to loop back through here every single time. - $token->name .= strtolower($char.$this->data->consumeWhile(self::CTYPE_UPPER)); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character to - # the current tag token's tag name. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->name .= "\u{FFFD}"; - } - # EOF - elseif ($char === '') { - # This is an eof-in-tag parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_TAG); - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the current tag token's tag name. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->name .= $char.$this->data->consumeUntil("\0\t\n\x0c />".self::CTYPE_UPPER); - } - } - - # 13.2.5.9 RCDATA less-than sign state - elseif ($this->state === self::RCDATA_LESS_THAN_SIGN_STATE) { - # Consume the next input character - - # "/" (U+002F) - if ($char === '/') { - # Set the temporary buffer to the empty string. - # Switch to the RCDATA end tag open state. - $this->temporaryBuffer = ''; - $this->state = self::RCDATA_END_TAG_OPEN_STATE; - } - # Anything else - else { - # Emit a U+003C LESS-THAN SIGN character token. - # Reconsume in the RCDATA state. - $this->state = self::RCDATA_STATE; - yield new CharacterToken('<'); - goto Reconsume; - } - } - - # 13.2.5.10 RCDATA end tag open state - elseif ($this->state === self::RCDATA_END_TAG_OPEN_STATE) { - # Consume the next input character - - # ASCII alpha - if (ctype_alpha($char)) { - # Create a new end tag token, set its tag name to the empty string. - # Reconsume in the RCDATA end tag name state. - $token = new EndTagToken(""); - $this->state = self::RCDATA_END_TAG_NAME_STATE; - goto Reconsume; - } - # Anything else - else { - # Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS character token. - # Reconsume in the RCDATA state. - $this->state = self::RCDATA_STATE; - yield new CharacterToken('state === self::RCDATA_END_TAG_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # If the current end tag token is an appropriate end tag token, then switch to the - # before attribute name state. Otherwise, treat it as per the "anything else" - # entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - } else { - goto RCDATA_end_tag_name_state_anything_else; - } - } - # "/" (U+002F) - elseif ($char === '/') { - # If the current end tag token is an appropriate end tag token, then switch to the - # self-closing start tag state. Otherwise, treat it as per the "anything else" - # entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::SELF_CLOSING_START_TAG_STATE; - } else { - goto RCDATA_end_tag_name_state_anything_else; - } - } - # ">" (U+003E) - elseif ($char === '>') { - # If the current end tag token is an appropriate end tag token, then switch to the - # data state and emit the current tag token. Otherwise, treat it as per the - # "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } else { - goto RCDATA_end_tag_name_state_anything_else; - } - } - # ASCII upper alpha - # ASCII lower alpha - elseif (ctype_alpha($char)) { - # Uppercase: - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the current - # tag token's tag name. - # Append the current input character to the temporary buffer. - # Lowercase: - # Append the current input character to the current - # tag token's tag name. - # Append the current input character to the temporary buffer. - - // OPTIMIZATION: Combine upper and lower alpha - // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $char .= $this->data->consumeWhile(self::CTYPE_ALPHA); - $token->name .= strtolower($char); - $this->temporaryBuffer .= $char; - } - # Anything else - else { - RCDATA_end_tag_name_state_anything_else: - # Emit a U+003C LESS-THAN SIGN character token, - # a U+002F SOLIDUS character token, and a character - # token for each of the characters in the temporary - # buffer (in the order they were added to the buffer). - # Reconsume in the RCDATA state. - $this->state = self::RCDATA_STATE; - yield new CharacterToken('temporaryBuffer); - goto Reconsume; - } - } - - # 13.2.5.12 RAWTEXT less-than sign state - elseif ($this->state === self::RAWTEXT_LESS_THAN_SIGN_STATE) { - # Consume the next input character - - # "/" (U+002F) - if ($char === '/') { - # Set the temporary buffer to the empty string. - # Switch to the RAWTEXT end tag open state. - $this->temporaryBuffer = ''; - $this->state = self::RAWTEXT_END_TAG_OPEN_STATE; - } - # Anything else - else { - # Emit a U+003C LESS-THAN SIGN character token. - # Reconsume in the RAWTEXT state. - $this->state = self::RAWTEXT_STATE; - yield new CharacterToken('<'); - goto Reconsume; - } - } - - # 13.2.5.13 RAWTEXT end tag open state - elseif ($this->state === self::RAWTEXT_END_TAG_OPEN_STATE) { - # Consume the next input character - - # ASCII alpha - if (ctype_alpha($char)) { - # Create a new end tag token, set its tag name to the empty string. - # Reconsume in the RAWTEXT end tag name state. - $token = new EndTagToken(""); - $this->state = self::RAWTEXT_END_TAG_NAME_STATE; - goto Reconsume; - } - # Anything else - else { - # Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS character token. - # Reconsume in the RAWTEXT state. - $this->state = self::RAWTEXT_STATE; - yield new CharacterToken('state === self::RAWTEXT_END_TAG_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # If the current end tag token is an appropriate end tag token, - # then switch to the before attribute name state. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - } else { - goto RAWTEXT_end_tag_name_state_anything_else; - } - } - # "/" (U+002F) - elseif ($char === '/') { - # If the current end tag token is an appropriate end tag token, - # then switch to the self-closing start tag state. - # Otherwise, treat it as per the "anything else" - # entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::SELF_CLOSING_START_TAG_STATE; - } else { - goto RAWTEXT_end_tag_name_state_anything_else; - } - } - # ">" (U+003E) - elseif ($char === '>') { - # If the current end tag token is an appropriate end tag token, - # then switch to the data state and emit the current tag token. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } else { - goto RAWTEXT_end_tag_name_state_anything_else; - } - } - # ASCII upper alpha - # ASCII lower apha - elseif (ctype_alpha($char)) { - # Uppercase: - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the current - # tag token's tag name. - # Append the current input character to the temporary buffer. - # Lowercase: - # Append the current input character to the current - # tag token's tag name. - # Append the current input character to the temporary buffer. - - // OPTIMIZATION: Combine upper and lower alpha - // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $char .= $this->data->consumeWhile(self::CTYPE_ALPHA); - $token->name .= strtolower($char); - $this->temporaryBuffer .= $char; - } - # Anything else - else { - RAWTEXT_end_tag_name_state_anything_else: - # Emit a U+003C LESS-THAN SIGN character token, - # a U+002F SOLIDUS character token, and a character - # token for each of the characters in the temporary - # buffer (in the order they were added to the buffer). - # Reconsume in the RAWTEXT state. - $this->state = self::RAWTEXT_STATE; - yield new CharacterToken('temporaryBuffer); - goto Reconsume; - } - } - - # 13.2.5.15 Script data less-than sign state - elseif ($this->state === self::SCRIPT_DATA_LESS_THAN_SIGN_STATE) { - # Consume the next input character - - # "/" (U+002F) - if ($char === '/') { - # Set the temporary buffer to the empty string. - # Switch to the script data end tag open state. - $this->temporaryBuffer = ''; - $this->state = self::SCRIPT_DATA_END_TAG_OPEN_STATE; - } - # "!" (U+0021) - elseif ($char === '!') { - # Switch to the script data escape start state. - # Emit a U+003C LESS-THAN SIGN character token - # and a U+0021 EXCLAMATION MARK character token. - $this->state = self::SCRIPT_DATA_ESCAPE_START_STATE; - yield new CharacterToken('state = self::SCRIPT_DATA_STATE; - yield new CharacterToken('<'); - goto Reconsume; - } - } - - # 13.2.5.16 Script data end tag open state - elseif ($this->state === self::SCRIPT_DATA_END_TAG_OPEN_STATE) { - # Consume the next input character - - # ASCII alpha - if (ctype_alpha($char)) { - # Create a new end tag token, set its tag name to the empty string. - # Reconsume in the script data end tag name state. - $token = new EndTagToken(""); - $this->state = self::SCRIPT_DATA_END_TAG_NAME_STATE; - goto Reconsume; - } - # Anything else - else { - # Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS character token. - # Reconsume in the script data state. - $this->state = self::SCRIPT_DATA_STATE; - yield new CharacterToken('state === self::SCRIPT_DATA_END_TAG_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # If the current end tag token is an appropriate end tag token, - # then switch to the before attribute name state. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - } else { - goto script_data_end_tag_name_state_anything_else; - } - } - # "/" (U+002F) - elseif ($char === '/') { - # If the current end tag token is an appropriate end tag token, - # then switch to the self-closing start tag state. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::SELF_CLOSING_START_TAG_STATE; - } else { - goto script_data_end_tag_name_state_anything_else; - } - } - # ">" (U+003E) - elseif ($char === '>') { - # If the current end tag token is an appropriate end tag token, - # then switch to the data state and emit the current tag token. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } else { - goto script_data_end_tag_name_state_anything_else; - } - } - # ASCII upper alpha - # ASCII lower alpha - elseif (ctype_alpha($char)) { - # Uppercase: - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the current - # tag token's tag name. - # Append the current input character to the temporary buffer. - # Lowercase: - # Append the current input character to the current - # tag token's tag name. - # Append the current input character to the temporary buffer. - - // OPTIMIZATION: Combine upper and lower alpha - // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $char = $char.$this->data->consumeWhile(self::CTYPE_ALPHA); - $token->name .= strtolower($char); - $this->temporaryBuffer .= $char; - } - # Anything else - else { - script_data_end_tag_name_state_anything_else: - # Emit a U+003C LESS-THAN SIGN character token, - # a U+002F SOLIDUS character token, and a character - # token for each of the characters in the temporary - # buffer (in the order they were added to the buffer). - # Reconsume in the script data state. - $this->state = self::SCRIPT_DATA_STATE; - yield new CharacterToken('temporaryBuffer); - goto Reconsume; - } - } - - # 13.2.5.18 Script data escape start state - elseif ($this->state === self::SCRIPT_DATA_ESCAPE_START_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the script data escape start dash state. - # Emit a U+002D HYPHEN-MINUS character token. - $this->state = self::SCRIPT_DATA_ESCAPE_START_DASH_STATE; - yield new CharacterToken('-'); - } - # Anything else - else { - # Switch to the script data state. - # Reconsume the current input character. - $this->state = self::SCRIPT_DATA_STATE; - goto Reconsume; - } - } - - # 13.2.5.19 Script data escape start dash state - elseif ($this->state === self::SCRIPT_DATA_ESCAPE_START_DASH_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the script data escaped dash dash state. - # Emit a U+002D HYPHEN-MINUS character token. - $this->state = self::SCRIPT_DATA_ESCAPED_DASH_DASH_STATE; - yield new CharacterToken('-'); - } - # Anything else - else { - # Reconsume in the script data state. - $this->state = self::SCRIPT_DATA_STATE; - goto Reconsume; - } - } - - # 13.2.5.20 Script data escaped state - elseif ($this->state === self::SCRIPT_DATA_ESCAPED_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the script data escaped dash state. - # Emit a U+002D HYPHEN-MINUS character token. - $this->state = self::SCRIPT_DATA_ESCAPED_DASH_STATE; - yield new CharacterToken('-'); - } - # "<" (U+003C) - elseif ($char === '<') { - # Switch to the script data escaped less-than sign state. - $this->state = self::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # This is an eof-in-script-html-comment-like-text parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT); - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("-<\0")); - } - } - } - - # 13.2.5.21 Script data escaped dash state - elseif ($this->state === self::SCRIPT_DATA_ESCAPED_DASH_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the script data escaped dash dash state. - # Emit a U+002D HYPHEN-MINUS character token. - $this->state = self::SCRIPT_DATA_ESCAPED_DASH_DASH_STATE; - yield new CharacterToken('-'); - } - # "<" (U+003C) - elseif ($char === '<') { - # Switch to the script data escaped less-than sign state. - $this->state = self::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Switch to the script data escaped state. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # This is an eof-in-script-html-comment-like-text parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT); - yield new EOFToken; - return; - } - # Anything else - else { - # Switch to the script data escaped state. - # Emit the current input character as a character token. - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char); - } else { - yield new CharacterToken($char); - } - } - } - - # 13.2.5.22 Script data escaped dash dash state - elseif ($this->state === self::SCRIPT_DATA_ESCAPED_DASH_DASH_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Emit a U+002D HYPHEN-MINUS character token. - yield new CharacterToken('-'); - } - # "<" (U+003C) - elseif ($char === '<') { - # Switch to the script data escaped less-than sign state. - $this->state = self::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the script data state. - # Emit a U+003E GREATER-THAN SIGN character token. - $this->state = self::SCRIPT_DATA_STATE; - yield new CharacterToken('>'); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Switch to the script data escaped state. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # This is an eof-in-script-html-comment-like-text parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT); - yield new EOFToken; - return; - } - # Anything else - else { - # Switch to the script data escaped state. - # Emit the current input character as a character token. - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char); - } else { - yield new CharacterToken($char); - } - } - } - - # 13.2.5.23 Script data escaped less-than sign state - elseif ($this->state === self::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE) { - # Consume the next input character - - # "/" (U+002F) - if ($char === '/') { - # Set the temporary buffer to the empty string. - # Switch to the script data escaped end tag open state. - $this->temporaryBuffer = ''; - $this->state = self::SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE; - } - # ASCII alpha - elseif (ctype_alpha($char)) { - # Set the temporary buffer to the empty string. - # Emit a U+003C LESS-THAN SIGN character token. - # Reconsume in the script data double escape start state. - - $this->temporaryBuffer = ''; - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE; - yield new CharacterToken('<'); - goto Reconsume; - } - # Anything else - else { - # Emit a U+003C LESS-THAN SIGN character token. - # Reconsume in the script data escaped state. - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - yield new CharacterToken("<"); - goto Reconsume; - } - } - - # 13.2.5.24 Script data escaped end tag open state - elseif ($this->state === self::SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE) { - # Consume the next input character - - # ASCII alpha - if (ctype_alpha($char)) { - # Create a new end tag token, set its tag name to the empty string. - # Reconsume in the script data escaped end tag name state. - - // OPTIMIZATION: Avoid reconsuming - // Set the tag name to the lowercase - // Append the original to the temporary buffer - $token = new EndTagToken(strtolower($char)); - $this->temporaryBuffer = $char; - $this->state = self::SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE; - } - # Anything else - else { - # Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS character token. - # Reconsume in the script data escaped state. - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - yield new CharacterToken('state === self::SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # If the current end tag token is an appropriate end tag token, - # then switch to the before attribute name state. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - } else { - goto script_data_escaped_end_tag_name_state_anything_else; - } - } - # "/" (U+002F) - elseif ($char === '/') { - # If the current end tag token is an appropriate end tag token, - # then switch to the self-closing start tag state. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::SELF_CLOSING_START_TAG_STATE; - } else { - goto script_data_escaped_end_tag_name_state_anything_else; - } - } - # ">" (U+003E) - elseif ($char === '>') { - # If the current end tag token is an appropriate end tag token, - # then switch to the data state and emit the current tag token. - # Otherwise, treat it as per the "anything else" entry below. - if ($token->name === $this->stack->currentNodeName) { - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } else { - goto script_data_escaped_end_tag_name_state_anything_else; - } - } - # ASCII upper alpha - # ASCII lower alpha - elseif (ctype_alpha($char)) { - # Uppercase: - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the current - # tag token's tag name. - # Append the current input character to the temporary buffer. - # Lowercase: - # Append the current input character to the current tag - # token's tag name. - # Append the current input character to the temporary buffer. - - // OPTIMIZATION: Combine upper and lower alpha - // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $char .= $this->data->consumeWhile(self::CTYPE_ALPHA); - $token->name .= strtolower($char); - $this->temporaryBuffer .= $char; - } - # Anything else - else { - script_data_escaped_end_tag_name_state_anything_else: - # Emit a U+003C LESS-THAN SIGN character token, - # a U+002F SOLIDUS character token, and a character token - # for each of the characters in the temporary buffer - # (in the order they were added to the buffer). - # Reconsume in the script data escaped state. - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - yield new CharacterToken('temporaryBuffer); - goto Reconsume; - } - } - - # 13.2.5.26 Script data double escape start state - elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE) { - # Consume the next input character - - # U+0009 CHARACTER TABULATION (tab) - # U+000A LINE FEED (LF) - # U+000C FORM FEED (FF) - # U+0020 SPACE - # U+002F SOLIDUS (/) - # U+003E GREATER-THAN SIGN (>) - if (strspn($char, " />\t\n\x0C")) { - # If the temporary buffer is the string "script", - # then switch to the script data double escaped state. - # Otherwise, switch to the script data escaped state. - # Emit the current input character as a character token. - if ($this->temporaryBuffer === 'script') { - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - } else { - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - } - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char); - } else { - yield new CharacterToken($char); - } - } - # ASCII upper alpha - # ASCII lower alpha - elseif (ctype_alpha($char)) { - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the temporary buffer. - # Emit the current input character as a character token. - - // OPTIMIZATION: Combine upper and lower alpha - // OPTIMIZATION: - // Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $char = $char.$this->data->consumeWhile(self::CTYPE_ALPHA); - $this->temporaryBuffer .= strtolower($char); - yield new CharacterToken($char); - } - # Anything else - else { - # Reconsume in the script data escaped state. - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - goto Reconsume; - } - } - - # 13.2.5.27 Script data double escaped state - elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the script data double escaped dash state. - # Emit a U+002D HYPHEN-MINUS character token. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE; - yield new CharacterToken('-'); - } - # "<" (U+003C) - elseif ($char === '<') { - # Switch to the script data double escaped less-than sign state. - # Emit a U+003C LESS-THAN SIGN character token. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE; - yield new CharacterToken('<'); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # This is an eof-in-script-html-comment-like-text parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT); - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("-<\0")); - } - } - } - - # 13.2.5.28 Script data double escaped dash state - elseif ($this->state == self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the script data double escaped dash dash state. - # Emit a U+002D HYPHEN-MINUS character token. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE; - yield new CharacterToken('-'); - } - # "<" (U+003C) - elseif ($char === '<') { - # Switch to the script data double escaped less-than sign state. - # Emit a U+003C LESS-THAN SIGN character token. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE; - yield new CharacterToken('<'); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Switch to the script data double escaped state. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # This is an eof-in-script-html-comment-like-text parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT); - yield new EOFToken; - return; - } - # Anything else - else { - # Switch to the script data double escaped state. - # Emit the current input character as a character token. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char); - } else { - yield new CharacterToken($char); - } - } - } - - # 13.2.5.29 Script data double escaped dash dash state - elseif ($this->state == self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Emit a U+002D HYPHEN-MINUS character token. - yield new CharacterToken('-'); - } - # "<" (U+003C) - elseif ($char === '<') { - # Switch to the script data double escaped less-than sign state. - # Emit a U+003C LESS-THAN SIGN character token. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE; - yield new CharacterToken('<'); - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the script data state. - # Emit a U+003E GREATER-THAN SIGN character token. - $this->state = self::SCRIPT_DATA_STATE; - yield new CharacterToken('>'); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Switch to the script data double escaped state. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - yield new CharacterToken("\u{FFFD}"); - } - # EOF - elseif ($char === '') { - # This is an eof-in-script-html-comment-like-text parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT); - yield new EOFToken; - return; - } - # Anything else - else { - # Switch to the script data double escaped state. - # Emit the current input character as a character token. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char); - } else { - yield new CharacterToken($char); - } - } - } - - # 13.2.5.30 Script data double escaped less-than sign state - elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE) { - # Consume the next input character - - # "/" (U+002F) - if ($char === '/') { - # Set the temporary buffer to the empty string. - # Switch to the script data double escape end state. - # Emit a U+002F SOLIDUS character token. - $this->temporaryBuffer = ''; - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE; - yield new CharacterToken('/'); - } - # Anything else - else { - # Reconsume in the script data double escaped state. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - goto Reconsume; - } - } - - # 13.2.5.31 Script data double escape end state - elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - # "/" (U+002F) - # ">" (U+003E) - if (strspn($char, " />\t\n\x0C")) { - # If the temporary buffer is the string "script", - # then switch to the script data escaped state. - # Otherwise, switch to the script data double escaped state. - # Emit the current input character as a character token. - if ($this->temporaryBuffer === 'script') { - $this->state = self::SCRIPT_DATA_ESCAPED_STATE; - } else { - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - } - if (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char); - } else { - yield new CharacterToken($char); - } - } - # ASCII upper alpha - # ASCII lower alpha - elseif (ctype_alpha($char)) { - # Uppercase: - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the temporary buffer. - # Emit the current input character as a character token. - # Lowercase: - # Append the current input character to the temporary buffer. - # Emit the current input character as a character token. - - // OPTIMIZATION: Combine upper and lower alpha - // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having - // to loop back through here every single time. - $char = $char.$this->data->consumeWhile(self::CTYPE_ALPHA); - $this->temporaryBuffer .= strtolower($char); - yield new CharacterToken($char); - } - # Anything else - else { - # Reconsume in the script data double escaped state. - $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE; - goto Reconsume; - } - } - - # 13.2.5.32 Before attribute name state - elseif ($this->state === self::BEFORE_ATTRIBUTE_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # Ignore the character. - } - # "/" (U+002F) - # ">" (U+003E) - # EOF - elseif ($char === '/' || $char === '>' || $char === '') { - # Reconsume in the after attribute name state. - $this->state = self::AFTER_ATTRIBUTE_NAME_STATE; - goto Reconsume; - } - # "=" (U+003D) - elseif ($char === '=') { - # This is an unexpected-equals-sign-before-attribute-name parse error. - # Start a new attribute in the current tag token. - # Set that attribute's name to the current input character, - # and its value to the empty string. - # Switch to the attribute name state. - $this->error(ParseError::UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME); - $attribute = new TokenAttr($char, ''); - $this->state = self::ATTRIBUTE_NAME_STATE; - } - # Anything else - else { - # Start a new attribute in the current tag token. - # Set that attribute name and value to the empty string. - # Reconsume in the attribute name state. - $attribute = new TokenAttr('', ''); - $this->state = self::ATTRIBUTE_NAME_STATE; - goto Reconsume; - } - } - - # 13.2.5.33 Attribute name state - elseif ($this->state === self::ATTRIBUTE_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - # "/" (U+002F) - # U+003E GREATER-THAN SIGN (>) - # EOF - if (strspn($char, " />\t\n\x0C") || $char === '') { - # Reconsume in the after attribute name state. - $this->keepOrDiscardAttribute($token, $attribute); - $this->state = self::AFTER_ATTRIBUTE_NAME_STATE; - goto Reconsume; - } - # "=" (U+003D) - elseif ($char === '=') { - # Switch to the before attribute value state. - $this->keepOrDiscardAttribute($token, $attribute); - $this->state = self::BEFORE_ATTRIBUTE_VALUE_STATE; - } - # ASCII upper alpha - elseif (ctype_upper($char)) { - # Append the lowercase version of the current input character - # (add 0x0020 to the character's code point) to the - # current attribute's name. - - // OPTIMIZATION: - // Consume all characters that are uppercase ASCII letters to prevent - // having to loop back through here every single time. - $attribute->name .= strtolower($char.$this->data->consumeWhile(self::CTYPE_UPPER)); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's name. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $attribute->name .= "\u{FFFD}"; - } - # U+0022 QUOTATION MARK (") - # "'" (U+0027) - # "<" (U+003C) - elseif ($char === '"' || $char === "'" || $char === '<') { - # This is an unexpected-character-in-attribute-name parse error. - # Treat it as per the "anything else" entry below. - $this->error(ParseError::UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME, $char); - goto attribute_name_state_anything_else; - } - # Anything else - else { - attribute_name_state_anything_else: - # Append the current input character to the current attribute's name. - $attribute->name .= $char.$this->data->consumeUntil("\t\n\x0c /=>\0\"'<".self::CTYPE_UPPER); - } - } - - # 13.2.5.34 After attribute name state - elseif ($this->state === self::AFTER_ATTRIBUTE_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # Ignore the character. - } - # U+002F SOLIDUS (/) - elseif ($char === '/') { - # Switch to the self-closing start tag state. - $this->state = self::SELF_CLOSING_START_TAG_STATE; - } - # U+003D EQUALS SIGN (=) - elseif ($char === '=') { - # Switch to the before attribute value state. - $this->state = self::BEFORE_ATTRIBUTE_VALUE_STATE; - } - # U+003E GREATER-THAN SIGN (>) - elseif ($char === '>') { - # Switch to the data state. - # Emit the current tag token. - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-tag parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_TAG); - yield new EOFToken; - return; - } - # Anything else - else { - # Start a new attribute in the current tag token. - # Set that attribute name and value to the empty string. - # Reconsume in the attribute name state. - $attribute = new TokenAttr('', ''); - $this->state = self::ATTRIBUTE_NAME_STATE; - goto Reconsume; - } - } - - # 13.2.5.35 Before attribute value state - elseif ($this->state === self::BEFORE_ATTRIBUTE_VALUE_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # Ignore the character. - } - # U+0022 QUOTATION MARK (") - elseif ($char === '"') { - # Switch to the attribute value (double-quoted) state. - $this->state = self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; - } - # "'" (U+0027) - elseif ($char === "'") { - # Switch to the attribute value (single-quoted) state. - $this->state = self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is a missing-attribute-value parse error. - # Switch to the data state. - # Emit the current tag token. - $this->error(ParseError::MISSING_ATTRIBUTE_VALUE); - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } - # Anything else - else { - # Reconsume in the attribute value (unquoted) state. - $this->state = self::ATTRIBUTE_VALUE_UNQUOTED_STATE; - goto Reconsume; - } - } - - # 13.2.5.36 Attribute value (double-quoted) state - elseif ($this->state === self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { - # Consume the next input character - - # U+0022 QUOTATION MARK (") - if ($char === '"') { - # Switch to the after attribute value (quoted) state. - $this->state = self::AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; - } - # U+0026 AMPERSAND (&) - elseif ($char === '&') { - # Set the return state to the attribute value (double-quoted) state. - # Switch to the character reference state. - - // DEVIATION: Character reference consumption implemented as a function - $attribute->value .= $this->switchToCharacterReferenceState(self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's value. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $attribute->value .= "\u{FFFD}"; - } - # EOF - elseif ($char === '') { - # This is an eof-in-tag parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_TAG); - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the current attribute's value. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $attribute->value .= $char.$this->data->consumeUntil("\"&\0"); - } - } - - # 13.2.5.37 Attribute value (single-quoted) state - elseif ($this->state === self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { - # Consume the next input character - - # U+0027 APOSTROPHE (') - if ($char === "'") { - # Switch to the after attribute value (quoted) state. - $this->state = self::AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; - } - # U+0026 AMPERSAND (&) - elseif ($char === '&') { - # Set the return state to the attribute value (single-quoted) state. - # Switch to the character reference state. - - // DEVIATION: Character reference consumption implemented as a function - $attribute->value .= $this->switchToCharacterReferenceState(self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE); - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's value. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $attribute->value .= "\u{FFFD}"; - } - # EOF - elseif ($char === '') { - # This is an eof-in-tag parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_TAG); - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the current attribute's value. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $attribute->value .= $char.$this->data->consumeUntil("'&\0"); - } - } - - - # 13.2.5.38 Attribute value (unquoted) state - elseif ($this->state === self::ATTRIBUTE_VALUE_UNQUOTED_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # Switch to the before attribute name state. - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - } - # U+0026 AMPERSAND (&) - elseif ($char === '&') { - # Set the return state to the attribute value (unquoted) state. - # Switch to the character reference state. - - // DEVIATION: Character reference consumption implemented as a function - $attribute->value .= $this->switchToCharacterReferenceState(self::ATTRIBUTE_VALUE_UNQUOTED_STATE); - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. Emit the current tag token. - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's value. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $attribute->value .= "\u{FFFD}"; - } - # U+0022 QUOTATION MARK (") - # "'" (U+0027) - # "<" (U+003C) - # "=" (U+003D) - # "`" (U+0060) - elseif (strspn($char,"\"'<=`")) { - # This is an unexpected-character-in-unquoted-attribute-value parse error. - # Treat it as per the "anything else" entry below. - $this->error(ParseError::UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE, $char); - goto attribute_value_unquoted_state_anything_else; - } - # EOF - elseif ($char === '') { - # This is an eof-in-tag parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_TAG); - yield new EOFToken; - return; - } - # Anything else - else { - attribute_value_unquoted_state_anything_else: - # Append the current input character to the current attribute's value. - - // OPTIMIZATION: Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $attribute->value .= $char.$this->data->consumeUntil("\t\n\x0c &>\0\"'<=`"); - } - } - - # 13.2.5.39 After attribute value (quoted) state - elseif ($this->state === self::AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, " \t\n\x0C")) { - # Switch to the before attribute name state. - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - } - # "/" (U+002F) - elseif ($char === '/') { - # Switch to the self-closing start tag state. - $this->state = self::SELF_CLOSING_START_TAG_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. - # Emit the current tag token. - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-tag parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_TAG); - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-whitespace-between-attributes parse error. - # Reconsume in the before attribute name state. - $this->error(ParseError::MISSING_WHITESPACE_BETWEEN_ATTRIBUTES); - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - goto Reconsume; - } - } - - # 13.2.5.40 Self-closing start tag state - elseif ($this->state === self::SELF_CLOSING_START_TAG_STATE) { - # Consume the next input character - - # ">" (U+003E) - if ($char === '>') { - # Set the self-closing flag of the current tag token. - # Switch to the data state. - # Emit the current tag token. - $token->selfClosing = true; - $this->state = self::DATA_STATE; - $this->sanitizeTag($token); - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-tag parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_TAG); - yield new EOFToken; - return; - } - # Anything else - else { - # This is an unexpected-solidus-in-tag parse error. - # Reconsume in the before attribute name state. - $this->error(ParseError::UNEXPECTED_SOLIDUS_IN_TAG); - $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; - goto Reconsume; - } - } - - # 13.2.5.44 Bogus comment state - elseif ($this->state === self::BOGUS_COMMENT_STATE) { - # Consume the next input character - - # U+003E GREATER-THAN SIGN (>) - if ($char === '>') { - # Switch to the data state. - # Emit the comment token. - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # Emit the comment. - # Emit an end-of-file token. - yield $token; - yield new EOFToken; - return; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character to the comment token's data. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->data .= "\u{FFFD}"; - } - # Anything else - else { - # Append the current input character to the comment token's data. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->data .= $char.$this->data->consumeUntil(">\0"); - } - } - - # 13.2.5.42 Markup declaration open state - elseif ($this->state === self::MARKUP_DECLARATION_OPEN_STATE) { - # If the next few characters are: - - # Two U+002D HYPHEN-MINUS characters (-) - if ($this->data->peek(2) === '--') { - # Consume those two characters, - # create a comment token whose data is the empty string, - # and switch to the comment start state. - $this->data->consumeWhile("-", 2); - $token = new CommentToken(''); - $this->state = self::COMMENT_START_STATE; - } - //OPTIMIZATION: Peek seven characters only once - else { - $peek = $this->data->peek(7); - # ASCII case-insensitive match for the word "DOCTYPE" - if (strtoupper($peek) === 'DOCTYPE') { - # Consume those characters and switch to the DOCTYPE state. - $this->data->consumeWhile(self::CTYPE_ALPHA, 7); - $this->state = self::DOCTYPE_STATE; - } - # Case-sensitive match for the string "[CDATA[" - elseif ($peek === '[CDATA[') { - # Consume those characters. - # If there is an adjusted current node and it is not an - # element in the HTML namespace, then switch to the - # CDATA section state. - # Otherwise, this is a cdata-in-html-content parse error. - # Create a comment token whose data is the "[CDATA[" string. - # Switch to the bogus comment state. - $this->data->consumeWhile(self::CTYPE_ALPHA."[", 7); - if ($this->stack->adjustedCurrentNode && ($this->stack->adjustedCurrentNode->namespaceURI ?? Parser::HTML_NAMESPACE) !== Parser::HTML_NAMESPACE) { - $this->state = self::CDATA_SECTION_STATE; - } else { - $this->error(ParseError::CDATA_IN_HTML_CONTENT); - $token = new CommentToken('[CDATA['); - $this->state = self::BOGUS_COMMENT_STATE; - } - } - # Anything else - else { - # This is an incorrectly-opened-comment parse error. - # Create a comment token whose data is the empty string. - # Switch to the bogus comment state - # (don't consume anything in the current state). - $this->error(ParseError::INCORRECTLY_OPENED_COMMENT); - $token = new CommentToken(''); - $this->state = self::BOGUS_COMMENT_STATE; - } - } - } - - # 13.2.5.43 Comment start state - elseif ($this->state === self::COMMENT_START_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the comment start dash state. - $this->state = self::COMMENT_START_DASH_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is an abrupt-closing-of-empty-comment parse error. - # Switch to the data state. - # Emit the comment token. - $this->error(ParseError::ABRUPT_CLOSING_OF_EMPTY_COMMENT); - $this->state = self::DATA_STATE; - yield $token; - } - # Anything else - else { - # Reconsume in the comment state. - $this->state = self::COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.44 Comment start dash state - elseif ($this->state === self::COMMENT_START_DASH_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the comment end state. - $this->state = self::COMMENT_END_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is an abrupt-closing-of-empty-comment parse error. - # Switch to the data state. - # Emit the comment token. - $this->error(ParseError::ABRUPT_CLOSING_OF_EMPTY_COMMENT); - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-comment parse error. - # Emit the comment token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_COMMENT); - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append a U+002D HYPHEN-MINUS character (-) to the comment token's data. - # Reconsume in the comment state. - $token->data .= '-'; - $this->state = self::COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.45 Comment state - elseif ($this->state === self::COMMENT_STATE) { - # Consume the next input character - - # "<" (U+003C) - if ($char === '<') { - # Append the current input character to the comment token's data. - # Switch to the comment less-than sign state. - $token->data .= $char; - $this->state = self::COMMENT_LESS_THAN_SIGN_STATE; - } - # "-" (U+002D) - elseif ($char === '-') { - # Switch to the comment end dash state - $this->state = self::COMMENT_END_DASH_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character to the comment token's data. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->data .= "\u{FFFD}"; - } - # EOF - elseif ($char === '') { - # This is an eof-in-comment parse error. - # Emit the comment token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_COMMENT); - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the comment token's data. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->data .= $char.$this->data->consumeUntil("<-\0"); - } - } - - # 13.2.5.46 Comment less-than sign state - elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_STATE) { - # Consume the next input character - - # U+0021 EXCLAMATION MARK (!) - if ($char === '!') { - # Append the current input character to the comment token's data. - # Switch to the comment less-than sign bang state. - $token->data .= $char; - $this->state = self::COMMENT_LESS_THAN_SIGN_BANG_STATE; - } - # U+003C LESS-THAN SIGN (<) - elseif ($char ==='<') { - # Append the current input character to the comment token's data. - $token->data .= $char; - } - # Anything else - else { - # Reconsume in the comment state - $this->state = self::COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.47 Comment less-than sign bang state - elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_BANG_STATE) { - # Consume the next input character - - # U+002D HYPHEN-MINUS (-) - if ($char === '-') { - # Switch to the comment less-than sign bang dash state. - $this->state = self::COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE; - } - # Anything else - else { - # Reconsume in the comment state - $this->state = self::COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.48 Comment less-than sign bang dash state - elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE) { - # Consume the next input character - - # U+002D HYPHEN-MINUS (-) - if ($char === '-') { - # Switch to the comment less-than sign bang dash dash state. - $this->state = self::COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE; - } - # Anything else - else { - # Reconsume in the comment end dash state - $this->state = self::COMMENT_END_DASH_STATE; - goto Reconsume; - } - } - - # 13.2.5.49 Comment less-than sign bang dash dash state - elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE) { - # Consume the next input character - - # U+003E GREATER-THAN SIGN (>) - # EOF - if ($char === '>' || $char === '') { - # Reconsume in the comment end state. - $this->state = self::COMMENT_END_STATE; - goto Reconsume; - } - # Anything else - else { - # This is a nested-comment parse error. - # Reconsume in the comment end state. - $this->error(ParseError::NESTED_COMMENT); - $this->state = self::COMMENT_END_STATE; - goto Reconsume; - } - } - - # 13.2.5.50 Comment end dash state - elseif ($this->state === self::COMMENT_END_DASH_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Switch to the comment end state - $this->state = self::COMMENT_END_STATE; - } - # EOF - elseif ($char === '') { - # This is an eof-in-comment parse error. - # Emit the comment token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_COMMENT); - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append a "-" (U+002D) character to the comment token's data. - # Reconsume in the comment state. - $token->data .= '-'; - $this->state = self::COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.50 Comment end state - elseif ($this->state === self::COMMENT_END_STATE) { - # Consume the next input character - - # ">" (U+003E) - if ($char === '>') { - # Switch to the data state. - # Emit the comment token. - $this->state = self::DATA_STATE; - yield $token; - } - # "!" (U+0021) - elseif ($char === '!') { - # Switch to the comment end bang state. - $this->state = self::COMMENT_END_BANG_STATE; - } - # "-" (U+002D) - elseif ($char === '-') { - # Append a U+002D HYPHEN-MINUS character (-) to the comment token's data. - - // OPTIMIZATION: - // Consume all '-' characters to prevent having to loop back through - // here every single time. - $token->data .= $char.$this->data->consumeWhile('-'); - } - # EOF - elseif ($char === '') { - # This is an eof-in-comment parse error. - # Emit the comment token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_COMMENT); - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append two U+002D HYPHEN-MINUS characters (-) to the comment token's data. - # Reconsume in the comment state. - $token->data .= '--'; - $this->state = self::COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.52 Comment end bang state - elseif ($this->state === self::COMMENT_END_BANG_STATE) { - # Consume the next input character - - # "-" (U+002D) - if ($char === '-') { - # Append two U+002D HYPHEN-MINUS characters (-) - # and a U+0021 EXCLAMATION MARK character (!) - # to the comment token's data. - # Switch to the comment end dash state. - $token->data .= '--!'; - $this->state = self::COMMENT_END_DASH_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is an incorrectly-closed-comment parse error. - # Switch to the data state. - # Emit the comment token. - $this->error(ParseError::INCORRECTLY_CLOSED_COMMENT); - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-comment parse error. - # Emit the comment token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_COMMENT); - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append two U+002D HYPHEN-MINUS characters (-) - # and a U+0021 EXCLAMATION MARK character (!) - # to the comment token's data. - # Reconsume in the comment state. - $token->data .= '--!'; - $this->state = self::COMMENT_STATE; - goto Reconsume; - } - } - - # 13.2.5.53 DOCTYPE state - elseif ($this->state === self::DOCTYPE_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Switch to the before DOCTYPE name state. - $this->state = self::BEFORE_DOCTYPE_NAME_STATE; - } - # U+003E GREATER-THAN SIGN (>) - elseif ($char === '>') { - # Reconsume in the before DOCTYPE name state. - $this->state = self::BEFORE_DOCTYPE_NAME_STATE; - goto Reconsume; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Create a new DOCTYPE token. - # Set its force-quirks flag to on. - # Emit the token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token = new DOCTYPEToken(); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-whitespace-before-doctype-name parse error. - # Reconsume in the before DOCTYPE name state. - $this->error(ParseError::MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME); - $this->state = self::BEFORE_DOCTYPE_NAME_STATE; - goto Reconsume; - } - } - - # 13.2.5.54 Before DOCTYPE name state - elseif ($this->state === self::BEFORE_DOCTYPE_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Ignore the character. - } - // See below for ASCII upper alpha - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Create a new DOCTYPE token. - # Set the token's name to a U+FFFD REPLACEMENT CHARACTER character. - # Switch to the DOCTYPE name state. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token = new DOCTYPEToken("\u{FFFD}"); - $this->state = self::DOCTYPE_NAME_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is a missing-doctype-name parse error. - # Create a new DOCTYPE token. - # Set its force-quirks flag to on. - # Switch to the data state. - # Emit the token. - $this->error(ParseError::MISSING_DOCTYPE_NAME); - $token = new DOCTYPEToken(); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Create a new DOCTYPE token. - # Set its force-quirks flag to on. - # Emit the token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token = new DOCTYPEToken(); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # ASCII upper alpha - # Anything else - else { - # Create a new DOCTYPE token. - # Set the token's name to the current input character. - # Switch to the DOCTYPE name state. - - // OPTIMIZATION: Also handle ASCII upper alpha - // OPTIMIZATION: Consume characters not explicitly handled by the "DOCTYPE name" state - $token = new DOCTYPEToken(strtolower($char.$this->data->consumeUntil("\t\n\x0c >\0"))); - $this->state = self::DOCTYPE_NAME_STATE; - } - } - - # 13.2.5.55 DOCTYPE name state - elseif ($this->state === self::DOCTYPE_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Switch to the after DOCTYPE name state. - $this->state = self::AFTER_DOCTYPE_NAME_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. - # Emit the current DOCTYPE token. - $this->state = self::DATA_STATE; - yield $token; - } - // See below for ASCII upper alpha - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character - # to the current DOCTYPE token's name. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->name .= "\u{FFFD}"; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # ASCII upper alpha - # Anything else - else { - # Append the current input character to the current DOCTYPE token's name. - - // OPTIMIZATION: Also handle ASCII upper alpha - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->name .= strtolower($char.$this->data->consumeUntil("\t\n\x0c >\0")); - } - } - - # 13.2.5.56 After DOCTYPE name state - elseif ($this->state === self::AFTER_DOCTYPE_NAME_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Ignore the character - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. - # Emit the current DOCTYPE token. - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - // OPTIMIZATION: Peek only once; we peek because consuming could alter the order of errors - $peek = strtoupper($char.$this->data->peek(5)); - # If the six characters starting from the current input - # character are an ASCII case-insensitive match for the - # word "PUBLIC", then consume those characters and - # switch to the after DOCTYPE public keyword state. - if($peek === 'PUBLIC') { - $this->data->consumeWhile(self::CTYPE_ALPHA, 5); - $this->state = self::AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE; - } - # Otherwise, if the six characters starting from the current input - # character are an ASCII case-insensitive match for the - # word "SYSTEM", then consume those characters and - # switch to the after DOCTYPE system keyword state. - elseif ($peek === 'SYSTEM') { - $this->data->consumeWhile(self::CTYPE_ALPHA, 5); - $this->state = self::AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE; - } - # Otherwise, this is an - # invalid-character-sequence-after-doctype-name - # parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Reconsume in the bogus DOCTYPE state. - else { - $this->error(ParseError::INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - } - - # 13.2.5.57 After DOCTYPE public keyword state - elseif ($this->state === self::AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Switch to the before DOCTYPE public identifier state. - $this->state = self::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE; - } - # U+0022 QUOTATION MARK (") - elseif ($char === '"') { - # This is a missing-whitespace-after-doctype-public-keyword parse error. - # Set the DOCTYPE token's public identifier to the empty string (not missing), - # then switch to the DOCTYPE public identifier (double-quoted) state. - $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD); - $token->public = ''; - $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE; - } - # "'" (U+0027) - elseif ($char === "'") { - # This is a missing-whitespace-after-doctype-public-keyword parse error. - # Set the DOCTYPE token's public identifier to the empty string (not missing), - # then switch to the DOCTYPE public identifier (single-quoted) state. - $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD); - $token->public = ''; - $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is a missing-doctype-public-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::MISSING_DOCTYPE_PUBLIC_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-quote-before-doctype-public-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Reconsume in the bogus DOCTYPE state. - $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - - # 13.2.5.58 Before DOCTYPE public identifier state - elseif ($this->state === self::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Ignore the character. - } - # U+0022 QUOTATION MARK (") - elseif ($char === '"') { - # Set the DOCTYPE token's public identifier to the empty string (not missing), - # then switch to the DOCTYPE public identifier (double-quoted) state. - $token->public = ''; - $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE; - } - # "'" (U+0027) - elseif ($char === "'") { - # Set the DOCTYPE token's public identifier to the empty string (not missing), - # then switch to the DOCTYPE public identifier (single-quoted) state. - $token->public = ''; - $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is a missing-doctype-public-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::MISSING_DOCTYPE_PUBLIC_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-quote-before-doctype-public-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Reconsume in the bogus DOCTYPE state. - $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - - # 13.2.5.59 DOCTYPE public identifier (double-quoted) state - elseif ($this->state === self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) { - # Consume the next input character - - # U+0022 QUOTATION MARK (") - if ($char === '"') { - # Switch to the after DOCTYPE public identifier state. - $this->state = self::AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character - # to the current DOCTYPE token's public identifier. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->public .= "\u{FFFD}"; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is an abrupt-doctype-public-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the - # current DOCTYPE token's public identifier. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->public .= $char.$this->data->consumeUntil("\">\0"); - } - } - - # 13.2.5.60 DOCTYPE public identifier (single-quoted) state - elseif ($this->state === self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) { - # Consume the next input character - - # "'" (U+0027) - if ($char === "'") { - # Switch to the after DOCTYPE public identifier state. - $this->state = self::AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character - # to the current DOCTYPE token's public identifier. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->public .= "\u{FFFD}"; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is an abrupt-doctype-public-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the - # current DOCTYPE token's public identifier. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->public .= $char.$this->data->consumeUntil("'>\0"); - } - } - - # 13.2.5.60 After DOCTYPE public identifier state - elseif ($this->state === self::AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Switch to the between DOCTYPE public and system identifiers state. - $this->state = self::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. - # Emit the current DOCTYPE token. - $this->state = self::DATA_STATE; - yield $token; - } - # U+0022 QUOTATION MARK (") - elseif ($char === '"') { - # This is a missing-whitespace-between-doctype-public-and-system-identifiers parse error. - # Set the DOCTYPE token's system identifier to the empty string (not missing), - # then switch to the DOCTYPE system identifier (double-quoted) state. - $this->error(ParseError::MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS); - $this->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; - } - # "'" (U+0027) - elseif ($char === "'") { - # This is a missing-whitespace-between-doctype-public-and-system-identifiers parse error. - # Set the DOCTYPE token's system identifier to the empty string (not missing), - # then switch to the DOCTYPE system identifier (single-quoted) state. - $this->error(ParseError::MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS); - $this->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-quote-before-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Reconsume in the bogus DOCTYPE state. - $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - - # 13.2.5.62 Between DOCTYPE public and system identifiers state - elseif ($this->state === self::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Ignore the character. - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. - # Emit the current DOCTYPE token. - $this->state = self::DATA_STATE; - yield $token; - } - # U+0022 QUOTATION MARK (") - elseif ($char === '"') { - # Set the DOCTYPE token's system identifier to the - # empty string (not missing), then switch to the - # DOCTYPE system identifier (double-quoted) state. - $this->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; - } - # "'" (U+0027) - elseif ($char === "'") { - # Set the DOCTYPE token's system identifier to the - # empty string (not missing), then switch to the - # DOCTYPE system identifier (single-quoted) state. - $this->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-quote-before-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Reconsume in the bogus DOCTYPE state. - $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - - # 13.2.5.63 After DOCTYPE system keyword state - elseif ($this->state === self::AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Switch to the before DOCTYPE system identifier state. - $this->state = self::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE; - } - # U+0022 QUOTATION MARK (") - elseif ($char === '"') { - # This is a missing-whitespace-after-doctype-system-keyword parse error. - # Set the DOCTYPE token's system identifier to the empty string (not missing), - # then switch to the DOCTYPE system identifier (double-quoted) state. - $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD); - $token->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; - } - # "'" (U+0027) - elseif ($char === "'") { - # This is a missing-whitespace-after-doctype-system-keyword parse error. - # Set the DOCTYPE token's system identifier to the empty string (not missing), - # then switch to the DOCTYPE system identifier (single-quoted) state. - $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD); - $token->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is a missing-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::MISSING_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-quote-before-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Reconsume in the bogus DOCTYPE state. - $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - - # 13.2.5.64 Before DOCTYPE system identifier state - elseif ($this->state === self::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Ignore the character. - } - # U+0022 QUOTATION MARK (") - elseif ($char === '"') { - # Set the DOCTYPE token's system identifier to the - # empty string (not missing), then switch to the - # DOCTYPE system identifier (double-quoted) state. - $token->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; - } - # "'" (U+0027) - elseif ($char === "'") { - # Set the DOCTYPE token's system identifier to the - # empty string (not missing), then switch to the - # DOCTYPE system identifier (single-quoted) state. - $token->system = ''; - $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is a missing-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::MISSING_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is a missing-quote-before-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Reconsume in the bogus DOCTYPE state. - $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - - # 13.2.5.64 DOCTYPE system identifier (double-quoted) state - elseif ($this->state === self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) { - # Consume the next input character - - # U+0022 QUOTATION MARK (") - if ($char === '"') { - # Switch to the after DOCTYPE system identifier state. - $this->state = self::AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character - # to the current DOCTYPE token's system identifier. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->system .= "\u{FFFD}"; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is an abrupt-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the current DOCTYPE token's system identifier. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->system .= $char.$this->data->consumeUntil("\"\0>"); - } - } - - # 13.2.5.66 DOCTYPE system identifier (single-quoted) state - elseif ($this->state === self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) { - # Consume the next input character - - # "'" (U+0027) - if ($char === "'") { - # Switch to the after DOCTYPE system identifier state. - $this->state = self::AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Append a U+FFFD REPLACEMENT CHARACTER character - # to the current DOCTYPE token's system identifier. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - $token->system .= "\u{FFFD}"; - } - # ">" (U+003E) - elseif ($char === '>') { - # This is an abrupt-doctype-system-identifier parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Switch to the data state. - # Emit that DOCTYPE token. - $this->error(ParseError::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER); - $token->forceQuirks = true; - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # Append the current input character to the current DOCTYPE token's system identifier. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time. - $token->system .= $char.$this->data->consumeUntil("'\0>"); - } - } - - # 13.2.5.67 After DOCTYPE system identifier state - elseif ($this->state === self::AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { - # Consume the next input character - - # "tab" (U+0009) - # "LF" (U+000A) - # "FF" (U+000C) - # U+0020 SPACE - if (strspn($char, "\t\n\x0C ")) { - # Ignore the character - } - # ">" (U+003E) - elseif ($char === '>') { - # Switch to the data state. - # Emit the current DOCTYPE token. - $this->state = self::DATA_STATE; - yield $token; - } - # EOF - elseif ($char === '') { - # This is an eof-in-doctype parse error. - # Set the DOCTYPE token's force-quirks flag to on. - # Emit that DOCTYPE token. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_DOCTYPE); - $token->forceQuirks = true; - yield $token; - yield new EOFToken; - return; - } - # Anything else - else { - # This is an unexpected-character-after-doctype-system-identifier parse error. - # Reconsume in the bogus DOCTYPE state. - # (This does not set the DOCTYPE token's force-quirks flag to on.) - $this->error(ParseError::UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, $char); - $this->state = self::BOGUS_DOCTYPE_STATE; - goto Reconsume; - } - } - - # 13.2.5.67 Bogus DOCTYPE state - elseif ($this->state === self::BOGUS_DOCTYPE_STATE) { - # Consume the next input character - - # ">" (U+003E) - if ($char === '>') { - # Switch to the data state. - # Emit the DOCTYPE token. - $this->state = self::DATA_STATE; - yield $token; - } - # U+0000 NULL - elseif ($char === "\0") { - # This is an unexpected-null-character parse error. - # Ignore the character. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - } - # EOF - elseif ($char === '') { - # Emit the DOCTYPE token. - # Emit an end-of-file token. - yield $token; - yield new EOFToken; - return; - } - # Anything else - # Ignore the character. - } - - # 13.2.5.69 CDATA section state - elseif ($this->state === self::CDATA_SECTION_STATE) { - # Consume the next input character - - # U+005D RIGHT SQUARE BRACKET (]) - if ($char === ']') { - # Switch to the CDATA section bracket state. - $this->state = self::CDATA_SECTION_BRACKET_STATE; - } - # EOF - elseif ($char === '') { - # This is an eof-in-cdata parse error. - # Emit an end-of-file token. - $this->error(ParseError::EOF_IN_CDATA); - yield new EOFToken; - return; - } - # Anything else - else { - # Emit the current input character as a character token. - - // OPTIMIZATION: - // Consume all characters that aren't listed above to prevent having - // to loop back through here every single time; only null characters - // are emitted singly - if ($char === "\0") { - yield new NullCharacterToken($char); - } elseif (strspn($char, Data::WHITESPACE)) { - yield new WhitespaceToken($char.$this->data->consumeWhile(Data::WHITESPACE_SAFE)); - } else { - yield new CharacterToken($char.$this->data->consumeUntil("]\0")); - } - } - } - - # 13.2.5.70 CDATA section bracket state - elseif ($this->state === self::CDATA_SECTION_BRACKET_STATE) { - # Consume the next input character - - # U+005D RIGHT SQUARE BRACKET (]) - if ($char === ']') { - # Switch to the CDATA section end state. - $this->state = self::CDATA_SECTION_END_STATE; - } - # Anything else - else { - # Emit a U+005D RIGHT SQUARE BRACKET character token. - # Reconsume in the CDATA section state. - $this->state = self::CDATA_SECTION_STATE; - yield new CharacterToken(']'); - goto Reconsume; - } - } - - # 13.2.5.71 CDATA section end state - elseif ($this->state === self::CDATA_SECTION_END_STATE) { - # Consume the next input character - - # U+005D RIGHT SQUARE BRACKET (]) - if ($char === ']') { - # Emit a U+005D RIGHT SQUARE BRACKET character token. - - // OTPIMIZATION: Consume any additional right square brackets - yield new CharacterToken(']'.$this->data->consumeWhile(']')); - } - # U+003E GREATER-THAN SIGN character - elseif ($char === '>') { - # Switch to the data state. - $this->state = self::DATA_STATE; - } - # Anything else - else { - # Emit two U+005D RIGHT SQUARE BRACKET character tokens. - # Reconsume in the CDATA section state. - $this->state = self::CDATA_SECTION_STATE; - yield new CharacterToken(']]'); - goto Reconsume; - } - } - - # Not a valid state, unimplemented, or implemented elsewhere - else { - throw new Exception(Exception::TOKENIZER_INVALID_STATE, (self::STATE_NAMES[$this->state] ?? $this->state)); // @codeCoverageIgnore - } - } - } // @codeCoverageIgnore - - protected function switchToCharacterReferenceState(int $returnState): string { - // This function implements states 72 through 80, - // "Character reference" through "Numeric character reference end" states - $this->state = self::CHARACTER_REFERENCE_STATE; - $charRefCode = 0; - - while (true) { - assert((function() { - $state = self::STATE_NAMES[$this->state] ?? $this->state; - $char = bin2hex($this->data->peek(1)); - $this->debugLog .= " State: $state ($char)\n"; - return true; - })()); - - # 13.2.5.72 Character reference state - if ($this->state === self::CHARACTER_REFERENCE_STATE) { - # Set the temporary buffer to the empty string. - # Append a U+0026 AMPERSAND (&) character to the temporary buffer. - # Consume the next input character. - $this->temporaryBuffer = '&'; - $char = $this->data->consume(); - - # ASCII alphanumeric - if (ctype_alnum($char)) { - # Reconsume in the named character reference state. - $this->state = self::NAMED_CHARACTER_REFERENCE_STATE; - $this->data->unconsume(); - } - # U+0023 NUMBER SIGN (#) - elseif ($char === '#') { - # Append the current input character to the temporary buffer. - # Switch to the numeric character reference state. - $this->temporaryBuffer .= $char; - $this->state = self::NUMERIC_CHARACTER_REFERENCE_STATE; - } - # Anything else - else { - # Flush code points consumed as a character reference. - # Reconsume in the return state. - $this->state = $returnState; - $this->data->unconsume(); - return $this->temporaryBuffer; - } - } - - # 13.2.5.73 Named character reference state - elseif ($this->state === self::NAMED_CHARACTER_REFERENCE_STATE) { - # Consume the maximum number of characters possible, - # with the consumed characters matching one of the - # identifiers in the first column of the named character - # references table (in a case-sensitive manner). - - // DEVIATION: - // We consume all possible alphanumeric characters, - // up to the length of the longest in the table - $candidate = $this->data->consumeWhile(self::CTYPE_ALNUM, CharacterReference::LONGEST_NAME); - // Keep a record of the terminating character, which is used later - $next = $this->data->peek(1); - if ($next === ';') { - // consume the following character if it is a proper terminator - $candidate .= $this->data->consume(); - } - // Look for an exact match; if not found look for a prefix match - $match = CharacterReference::NAMES[$candidate] ?? null; - if ($match === null) { - $match = (preg_match(CharacterReference::PREFIX_PATTERN, $candidate, $match)) ? $match[0] : null; - // If a prefix match is found, unconsume to the end of the prefix and look up the entry in the table - if ($match !== null) { - $this->data->unconsume(strlen($candidate) - strlen($match)); - $next = $candidate[strlen($match)]; - $candidate = $match; - $match = CharacterReference::NAMES[$match]; - } - } - - # Append each character to the temporary buffer when it's consumed. - $this->temporaryBuffer .= $candidate; - - # If there is a match - if ($match !== null) { - # If the character reference was consumed as part of an attribute, - # and the last character matched is not a U+003B SEMICOLON character (;), - # and the next input character is either a U+003D EQUALS SIGN character (=) - # or an ASCII alphanumeric... - if (in_array($returnState, self::ATTRIBUTE_VALUE_STATE_SET) && $next !== ';' && ($next === '=' || ctype_alnum($next))) { - # ... then, for historical reasons, flush code points consumed - # as a character reference and switch to the return state. - $this->state = $returnState; - return $this->temporaryBuffer; - } - # Otherwise: - else { - # If the last character matched is not a U+003B SEMICOLON character (;), - # then this is a missing-semicolon-after-character-reference parse error. - if ($next !== ';') { - $this->error(ParseError::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE); - } - # Set the temporary buffer to the empty string. - # Append one or two characters corresponding to the - # character reference name (as given by the second - # column of the named character references table) - # to the temporary buffer. - # Flush code points consumed as a character reference. - # Switch to the return state. - - // In other words: return the match - $this->state = $returnState; - return $match; - } - } - # Otherwise: - else { - # Flush code points consumed as a character reference. - # Switch to the ambiguous ampersand state. - - // DEVIATION: We flush only when switching to the return state - $this->state = self::AMBIGUOUS_AMPERSAND_STATE; - // If we consumed a semicolon earlier we need to undo this - if ($next === ';') { - $this->data->unconsume(); - $this->temporaryBuffer = substr($this->temporaryBuffer, 0, -1); - } - } - } - - # 13.2.5.74 Ambiguous ampersand state - elseif ($this->state === self::AMBIGUOUS_AMPERSAND_STATE) { - # Consume the next input character. - $char = $this->data->consume(); - - # ASCII alphanumeric - if (ctype_alnum($char)) { - # If the character reference was consumed as part of an attribute, - # then append the current input character to the current attribute's value. - # Otherwise, emit the current input character as a character token. - - // DEVIATION: We just continue to buffer characters until it's time to return - $this->temporaryBuffer .= $char.$this->data->consumeWhile(self::CTYPE_ALNUM); - } - # U+003B SEMICOLON (;) - elseif ($char === ';') { - # This is an unknown-named-character-reference parse error. - # Reconsume in the return state. - $this->data->unconsume(); - $this->error(ParseError::UNKNOWN_NAMED_CHARACTER_REFERENCE, $this->temporaryBuffer.';'); - $this->state = $returnState; - return $this->temporaryBuffer; - } - # Anything else - else { - # Reconsume in the return state. - $this->state = $returnState; - $this->data->unconsume(); - return $this->temporaryBuffer; - } - } - - # 13.2.5.75 Numeric character reference state - elseif ($this->state === self::NUMERIC_CHARACTER_REFERENCE_STATE) { - # Set the character reference code to zero (0). - $charRefCode = 0; - # Consume the next input character. - $char = $this->data->consume(); - - # U+0078 LATIN SMALL LETTER X - #U+0058 LATIN CAPITAL LETTER X - if ($char === 'x' || $char === 'X') { - # Append the current input character to the temporary buffer. - # Switch to the hexadecimal character reference start state. - $this->temporaryBuffer .= $char; - $this->state = self::HEXADECIMAL_CHARACTER_REFERENCE_START_STATE; - } - # Anything else - else { - # Reconsume in the decimal character reference start state. - $this->state = self::DECIMAL_CHARACTER_REFERENCE_START_STATE; - $this->data->unconsume(); - } - } - - # 13.2.5.76 Hexadecimal character reference start state - elseif ($this->state === self::HEXADECIMAL_CHARACTER_REFERENCE_START_STATE) { - # Consume the next input character. - $char = $this->data->consume(); - - # ASCII hex digit - if (ctype_xdigit($char)) { - # Reconsume in the hexadecimal character reference state. - - // OPTIMIZATION: - // Just consume the digits here - $charRefCode = hexdec($char.$this->data->consumeWhile(self::CTYPE_HEX)); - $this->state = self::HEXADECIMAL_CHARACTER_REFERENCE_STATE; - } - # Anything else - else { - # This is an absence-of-digits-in-numeric-character-reference parse error. - # Flush code points consumed as a character reference. - # Reconsume in the return state. - $this->data->unconsume(); - $this->error(ParseError::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE); - $this->state = $returnState; - return $this->temporaryBuffer; - } - } - - # 13.2.5.77 Decimal character reference start state - elseif ($this->state === self::DECIMAL_CHARACTER_REFERENCE_START_STATE) { - # Consume the next input character. - $char = $this->data->consume(); - - # ASCII digit - if (ctype_digit($char)) { - # Reconsume in the decimal character reference state. - - // OPTIMIZATION: - // Just consume the digits here - $charRefCode = (int) ($char.$this->data->consumeWhile(self::CTYPE_NUM)); - $this->state = self::DECIMAL_CHARACTER_REFERENCE_STATE; - } - # Anything else - else { - # This is an absence-of-digits-in-numeric-character-reference parse error. - # Flush code points consumed as a character reference. - # Reconsume in the return state. - $this->data->unconsume(); - $this->error(ParseError::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE); - $this->state = $returnState; - return $this->temporaryBuffer; - } - } - - # 13.2.5.78 Hexadecimal character reference state - elseif ($this->state === self::HEXADECIMAL_CHARACTER_REFERENCE_STATE) { - # Consume the next input character. - $char = $this->data->consume(); - - # ASCII digit - # ASCII upper hex digit - # ASCII lower hex digit - if (ctype_xdigit($char)) { - # Multiply the character reference code by 16. - # Add a numeric version of the current input - # character to the character reference code. - - // OPTIMIZATION: Combine all digit types - // NOTE: This branch should never be reached - $charRefCode = ($charRefCode * 16) + hexdec($char); // @codeCoverageIgnore - } - # U+003B SEMICOLON - elseif ($char === ';') { - # Switch to the numeric character reference end state. - $this->state = self::NUMERIC_CHARACTER_REFERENCE_END_STATE; - } - # Anything else - else { - # This is a missing-semicolon-after-character-reference parse error. - # Reconsume in the numeric character reference end state. - $this->data->unconsume(); - $this->error(ParseError::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE); - $this->state = self::NUMERIC_CHARACTER_REFERENCE_END_STATE; - } - } - - # 13.2.5.79 Decimal character reference state - elseif ($this->state === self::DECIMAL_CHARACTER_REFERENCE_STATE) { - # Consume the next input character. - $char = $this->data->consume(); - - # ASCII digit - if (ctype_digit($char)) { - # Multiply the character reference code by 10. - # Add a numeric version of the current input - # character to the character reference code. - - // OPTIMIZATION: Combine all digit types - // NOTE: This branch should never be reached - $charRefCode = ($charRefCode * 10) + ((int) ($char)); // @codeCoverageIgnore - } - # U+003B SEMICOLON - elseif ($char === ';') { - # Switch to the numeric character reference end state. - $this->state = self::NUMERIC_CHARACTER_REFERENCE_END_STATE; - } - # Anything else - else { - # This is a missing-semicolon-after-character-reference parse error. - # Reconsume in the numeric character reference end state. - $this->data->unconsume(); - $this->error(ParseError::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE); - $this->state = self::NUMERIC_CHARACTER_REFERENCE_END_STATE; - } - } - - # 13.2.5.80 Numeric character reference end state - elseif ($this->state === self::NUMERIC_CHARACTER_REFERENCE_END_STATE) { - # Check the character reference code: - - # If the number is 0x00, then this is a null-character-reference parse error. - # Set the character reference code to 0xFFFD. - if ($charRefCode === 0) { - $this->error(ParseError::NULL_CHARACTER_REFERENCE); - $charRefCode = 0xFFFD; - } - # If the number is greater than 0x10FFFF, then this is a - # character-reference-outside-unicode-range parse error. - # Set the character reference code to 0xFFFD. - elseif ($charRefCode > 0x10FFFF) { - $this->error(ParseError::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE); - $charRefCode = 0xFFFD; - } - # If the number is a surrogate, then this is a - # surrogate-character-reference parse error. - # Set the character reference code to 0xFFFD. - elseif ($charRefCode >= 0xD800 && $charRefCode <= 0xDFFF) { - $this->error(ParseError::SURROGATE_CHARACTER_REFERENCE); - $charRefCode = 0xFFFD; - } - # If the number is a noncharacter, then this is a - # noncharacter-character-reference parse error. - elseif (($charRefCode >= 0xFDD0 && $charRefCode <= 0xFDEF) || ($charRefCode % 0x10000 & 0xFFFE) === 0xFFFE) { - $this->error(ParseError::NONCHARACTER_CHARACTER_REFERENCE); - } - # If the number is 0x0D, or a control that's not ASCII whitespace, then - # this is a control-character-reference parse error. - # If the number is one of the numbers in the first column of the following - # table, then find the row with that number in the first column, and set - # the character reference code to the number in the second column of that row. - elseif (($charRefCode < 0x20 && !in_array($charRefCode, [0x9, 0xA, 0xC])) || ($charRefCode >= 0x7F && $charRefCode <= 0x9F)) { - // NOTE: Table elided - $this->error(ParseError::CONTROL_CHARACTER_REFERENCE); - $charRefCode = CharacterReference::C1_TABLE[$charRefCode] ?? $charRefCode; - } - $this->temporaryBuffer = UTF8::encode($charRefCode); - $this->state = $returnState; - return $this->temporaryBuffer; - } - - # Not a valid state, unimplemented, or implemented elsewhere - else { - throw new Exception(Exception::TOKENIZER_INVALID_CHARACTER_REFERENCE_STATE, (self::STATE_NAMES[$this->state] ?? $this->state)); // @codeCoverageIgnore - } - } - } // @codeCoverageIgnore -} diff --git a/lib/TreeBuilder.php b/lib/TreeBuilder.php deleted file mode 100644 index 48c80aa..0000000 --- a/lib/TreeBuilder.php +++ /dev/null @@ -1,4307 +0,0 @@ - "Initial", - self::BEFORE_HTML_MODE => "Before html", - self::BEFORE_HEAD_MODE => "Before head", - self::IN_HEAD_MODE => "In head", - self::IN_HEAD_NOSCRIPT_MODE => "In head noscript", - self::AFTER_HEAD_MODE => "After head", - self::IN_BODY_MODE => "In body", - self::TEXT_MODE => "Text", - self::IN_TABLE_MODE => "In table", - self::IN_TABLE_TEXT_MODE => "In table text", - self::IN_CAPTION_MODE => "In caption", - self::IN_COLUMN_GROUP_MODE => "In column group", - self::IN_TABLE_BODY_MODE => "In table body", - self::IN_ROW_MODE => "In row", - self::IN_CELL_MODE => "In cell", - self::IN_SELECT_MODE => "In select", - self::IN_SELECT_IN_TABLE_MODE => "In select in table", - self::IN_TEMPLATE_MODE => "In template mode", - self::AFTER_BODY_MODE => "After body", - self::IN_FRAMESET_MODE => "In frameset", - self::AFTER_FRAMESET_MODE => "After frameset", - self::AFTER_AFTER_BODY_MODE => "After after body", - self::AFTER_AFTER_FRAMESET_MODE => "After after frameset", - ]; - protected const SVG_TAG_NAME_MAP = [ - 'altglyph' => 'altGlyph', - 'altglyphdef' => 'altGlyphDef', - 'altglyphitem' => 'altGlyphItem', - 'animatecolor' => 'animateColor', - 'animatemotion' => 'animateMotion', - 'animatetransform' => 'animateTransform', - 'clippath' => 'clipPath', - 'feblend' => 'feBlend', - 'fecolormatrix' => 'feColorMatrix', - 'fecomponenttransfer' => 'feComponentTransfer', - 'fecomposite' => 'feComposite', - 'feconvolvematrix' => 'feConvolveMatrix', - 'fediffuselighting' => 'feDiffuseLighting', - 'fedisplacementmap' => 'feDisplacementMap', - 'fedistantlight' => 'feDistantLight', - 'feflood' => 'feFlood', - 'fefunca' => 'feFuncA', - 'fefuncb' => 'feFuncB', - 'fefuncg' => 'feFuncG', - 'fefuncr' => 'feFuncR', - 'fegaussianblur' => 'feGaussianBlur', - 'feimage' => 'feImage', - 'femerge' => 'feMerge', - 'femergenode' => 'feMergeNode', - 'femorphology' => 'feMorphology', - 'feoffset' => 'feOffset', - 'fepointlight' => 'fePointLight', - 'fespecularlighting' => 'feSpecularLighting', - 'fespotlight' => 'feSpotLight', - 'fetile' => 'feTile', - 'feturbulence' => 'feTurbulence', - 'foreignobject' => 'foreignObject', - 'glyphref' => 'glyphRef', - 'lineargradient' => 'linearGradient', - 'radialgradient' => 'radialGradient', - 'textpath' => 'textPath', - ]; - protected const SVG_ATTR_NAME_MAP = [ - 'attributename' => 'attributeName', - 'attributetype' => 'attributeType', - 'basefrequency' => 'baseFrequency', - 'baseprofile' => 'baseProfile', - 'calcmode' => 'calcMode', - 'clippathunits' => 'clipPathUnits', - 'diffuseconstant' => 'diffuseConstant', - 'edgemode' => 'edgeMode', - 'filterunits' => 'filterUnits', - 'glyphref' => 'glyphRef', - 'gradienttransform' => 'gradientTransform', - 'gradientunits' => 'gradientUnits', - 'kernelmatrix' => 'kernelMatrix', - 'kernelunitlength' => 'kernelUnitLength', - 'keypoints' => 'keyPoints', - 'keysplines' => 'keySplines', - 'keytimes' => 'keyTimes', - 'lengthadjust' => 'lengthAdjust', - 'limitingconeangle' => 'limitingConeAngle', - 'markerheight' => 'markerHeight', - 'markerunits' => 'markerUnits', - 'markerwidth' => 'markerWidth', - 'maskcontentunits' => 'maskContentUnits', - 'maskunits' => 'maskUnits', - 'numoctaves' => 'numOctaves', - 'pathlength' => 'pathLength', - 'patterncontentunits' => 'patternContentUnits', - 'patterntransform' => 'patternTransform', - 'patternunits' => 'patternUnits', - 'pointsatx' => 'pointsAtX', - 'pointsaty' => 'pointsAtY', - 'pointsatz' => 'pointsAtZ', - 'preservealpha' => 'preserveAlpha', - 'preserveaspectratio' => 'preserveAspectRatio', - 'primitiveunits' => 'primitiveUnits', - 'refx' => 'refX', - 'refy' => 'refY', - 'repeatcount' => 'repeatCount', - 'repeatdur' => 'repeatDur', - 'requiredextensions' => 'requiredExtensions', - 'requiredfeatures' => 'requiredFeatures', - 'specularconstant' => 'specularConstant', - 'specularexponent' => 'specularExponent', - 'spreadmethod' => 'spreadMethod', - 'startoffset' => 'startOffset', - 'stddeviation' => 'stdDeviation', - 'stitchtiles' => 'stitchTiles', - 'surfacescale' => 'surfaceScale', - 'systemlanguage' => 'systemLanguage', - 'tablevalues' => 'tableValues', - 'targetx' => 'targetX', - 'targety' => 'targetY', - 'textlength' => 'textLength', - 'viewbox' => 'viewBox', - 'viewtarget' => 'viewTarget', - 'xchannelselector' => 'xChannelSelector', - 'ychannelselector' => 'yChannelSelector', - 'zoomandpan' => 'zoomAndPan', - ]; - protected const FOREIGN_ATTRIBUTE_NAMESPACE_MAP = [ - 'xlink:actuate' => Parser::XLINK_NAMESPACE, - 'xlink:arcrole' => Parser::XLINK_NAMESPACE, - 'xlink:href' => Parser::XLINK_NAMESPACE, - 'xlink:role' => Parser::XLINK_NAMESPACE, - 'xlink:show' => Parser::XLINK_NAMESPACE, - 'xlink:title' => Parser::XLINK_NAMESPACE, - 'xlink:type' => Parser::XLINK_NAMESPACE, - 'xml:id' => Parser::XML_NAMESPACE, // DEVIATION: We support xml:id simply because we can - 'xml:lang' => Parser::XML_NAMESPACE, - 'xml:space' => Parser::XML_NAMESPACE, - 'xmlns' => Parser::XMLNS_NAMESPACE, - 'xmlns:xlink' => Parser::XMLNS_NAMESPACE, - ]; - # The following elements have varying levels of special parsing rules: HTML’s - # address, applet, area, article, aside, base, basefont, bgsound, blockquote, - # body, br, button, caption, center, col, colgroup, dd, details, dir, div, dl, - # dt, embed, fieldset, figcaption, figure, footer, form, frame, frameset, h1, - # h2, h3, h4, h5, h6, head, header, hgroup, hr, html, iframe, img, input, - # keygen, li, link, listing, main, marquee, menu, meta, nav, noembed, noframes, - # noscript, object, ol, p, param, plaintext, pre, script, section, select, - # source, style, summary, table, tbody, td, template, textarea, tfoot, th, - # thead, title, tr, track, ul, wbr, xmp; MathML mi, MathML mo, MathML mn, - # MathML ms, MathML mtext, and MathML annotation-xml; and SVG foreignObject, - # SVG desc, and SVG title. - protected const SPECIAL_ELEMENTS = [ - Parser::HTML_NAMESPACE => ['address', 'applet', 'area', 'article', 'aside', 'base', 'basefont', 'bgsound', 'blockquote', 'body', 'br', 'button', 'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dir', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'iframe', 'img', 'input', 'keygen', 'li', 'link', 'listing', 'main', 'marquee', 'menu', 'meta', 'nav', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'p', 'param', 'plaintext', 'pre', 'script', 'section', 'select', 'source', 'style', 'summary', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul', 'wbr', 'xmp'], - Parser::MATHML_NAMESPACE => ['mi', 'mo', 'mn', 'ms', 'mtext', 'annotation-xml'], - Parser::SVG_NAMESPACE => ['foreignObject', 'desc', 'title'], - ]; - protected const FRAGMENT_CONTEXT_TOKENIZER_STATES = [ - Parser::HTML_NAMESPACE => [ - 'title' => Tokenizer::RCDATA_STATE, - 'textarea' => Tokenizer::RCDATA_STATE, - 'style' => Tokenizer::RAWTEXT_STATE, - 'xmp' => Tokenizer::RAWTEXT_STATE, - 'iframe' => Tokenizer::RAWTEXT_STATE, - 'noembed' => Tokenizer::RAWTEXT_STATE, - 'noframes' => Tokenizer::RAWTEXT_STATE, - 'script' => Tokenizer::SCRIPT_DATA_STATE, - 'noscript' => Tokenizer::DATA_STATE, // NOTE: If ever this implementation were scripted, this would need special handling - 'plaintext' => Tokenizer::PLAINTEXT_STATE, - ], - ]; - protected const APPROPRIATE_INSERTION_MODES = [ - "tr" => self::IN_ROW_MODE, - "tbody" => self::IN_TABLE_BODY_MODE, - "thead" => self::IN_TABLE_BODY_MODE, - "tfoot" => self::IN_TABLE_BODY_MODE, - "caption" => self::IN_CAPTION_MODE, - "colgroup" => self::IN_COLUMN_GROUP_MODE, - "table" => self::IN_TABLE_MODE, - "body" => self::IN_BODY_MODE, - "frameset" => self::IN_FRAMESET_MODE, - ]; - - public function __construct(\DOMDocument $dom, Data $data, Tokenizer $tokenizer, \Generator $tokenList, ParseError $errorHandler, OpenElementsStack $stack, TemplateInsertionModesStack $templateInsertionModes, ?\DOMElement $fragmentContext = null, ?bool $fragmentQuirks = null) { - if ($dom->hasChildNodes() || $dom->doctype) { - throw new Exception(Exception::TREEBUILDER_NON_EMPTY_TARGET_DOCUMENT); - } elseif (!in_array($fragmentQuirks ?? Parser::NO_QUIRKS_MODE, [Parser::NO_QUIRKS_MODE, Parser::LIMITED_QUIRKS_MODE, Parser::QUIRKS_MODE])) { - throw new Exception(Exception::INVALID_QUIRKS_MODE); - } - $this->DOM = $dom; - $this->fragmentContext = $fragmentContext; - $this->stack = $stack; - $this->templateInsertionModes = $templateInsertionModes; - $this->tokenizer = $tokenizer; - $this->data = $data; - $this->errorHandler = $errorHandler; - $this->activeFormattingElementsList = new ActiveFormattingElementsList; - $this->tokenList = $tokenList; - - # Parsing HTML fragments - if ($this->fragmentContext) { - # Create a new Document node, and mark it as being an HTML document. - // Already done. - # If the node document of the context element is in quirks mode, then - # let the Document be in quirks mode. Otherwise, the node document of - # the context element is in limited-quirks mode, then let the Document - # be in limited-quirks mode. Otherwise, leave the Document in no-quirks mode. - $this->quirksMode = $fragmentQuirks ?? $this->quirksMode; - # Create a new HTML parser, and associate it with the just created Document node. - // Already done. - # Set the state of the HTML parser's tokenization stage as follows, switching on the context element: - $this->tokenizer->state = (self::FRAGMENT_CONTEXT_TOKENIZER_STATES[$fragmentContext->namespaceURI ?? Parser::HTML_NAMESPACE] ?? [])[$fragmentContext->nodeName] ?? Tokenizer::DATA_STATE; - # Let root be a new html element with no attributes. - # Append the element root to the Document node created above. - $dom->appendChild($dom->createElement("html")); - # Set up the parser's stack of open elements so that it contains just the single element root. - $this->stack[] = $dom->documentElement; - # If the context element is a template element, push "in template" onto the stack of - # template insertion modes so that it is the new current template insertion mode. - if ($fragmentContext->nodeName === "template" && $fragmentContext->namespaceURI === null) { - $this->templateInsertionModes[] = self::IN_TEMPLATE_MODE; - } - # Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. - # Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point. - // Are these even necessary? - # Reset the parser's insertion mode appropriately. - $this->resetInsertionMode(); - # Set the parser's form element pointer to the nearest node to the context element - # that is a form element (going straight up the ancestor chain, and including the - # element itself, if it is a form element), if any. (If there is no such form element, - # the form element pointer keeps its initial value, null.) - $node = $fragmentContext; - do { - if ($node->nodeName === "form" && $fragmentContext->namespaceURI === null) { - $this->formElement = $node; - break; - } - } while ($node = $node->parentNode); - # Place the input into the input stream for the HTML parser just created. - # The encoding confidence is irrelevant. - // Already done. - # Start the parser and let it run until it has consumed all the characters just inserted into the input stream. - // Handled by emitToken() - } - } - - public function constructTree(): void { - foreach ($this->tokenList as $token) { - assert((function() use ($token) { - $this->debugLog .= "EMITTED: ".constant(get_class($token)."::NAME")."\n"; - return true; - })()); - assert($token instanceof CharacterToken || $token instanceof CommentToken || $token instanceof TagToken || $token instanceof DOCTYPEToken || $token instanceof EOFToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token))); - $iterations = 0; - $insertionMode = $this->insertionMode; - - // If element name coercison has occurred at some earlier point, - // we must coerce all end tag names to match mangled start tags - if ($token instanceof EndTagToken && $this->mangledElements) { - $token->name = $this->coerceName($token->name); - } - - # 13.2.6 Tree construction - # - # As each token is emitted from the tokenizer, the user agent must follow the - # appropriate steps from the following list, known as the tree construction dispatcher: - if ( - # If the stack of open elements is empty - !$this->stack->currentNode - # If the adjusted current node is an element in the HTML namespace - // DEVIATION: For the purposes of this implementation the HTML namespace is null - // rather than the XHTML namespace - || $this->stack->adjustedCurrentNodeNamespace === null - # If the adjusted current node is a MathML text integration - # point and the token is a start tag whose tag name is - # neither "mglyph" nor "malignmark" - # If the adjusted current node is a MathML text integration - # point and the token is a character token - || ($this->isMathMLTextIntegrationPoint($this->stack->adjustedCurrentNode) && (($token instanceof StartTagToken && ($token->name !== 'mglyph' && $token->name !== 'malignmark') || $token instanceof CharacterToken))) - # If the adjusted current node is an annotation-xml element - # in the MathML namespace and the token is a start tag - # whose tag name is "svg" - || ($this->stack->adjustedCurrentNodeNamespace === Parser::MATHML_NAMESPACE && $this->stack->adjustedCurrentNodeName === 'annotation-xml' && $token instanceof StartTagToken && $token->name === 'svg') - # If the adjusted current node is an HTML integration point - # and the token is a start tag - # If the adjusted current node is an HTML integration point - # and the token is a character token - || ($this->isHTMLIntegrationPoint($this->stack->adjustedCurrentNode) && ($token instanceof StartTagToken || $token instanceof CharacterToken)) - # If the token is an end-of-file token - || $token instanceof EOFToken - ) { - # Process the token according to the rules given in the section - # corresponding to the current insertion mode in HTML content. - ProcessToken: - assert($iterations++ < 50, new LoopException("Probable infinite loop detected in HTML content handling (inner reprocessing)")); - - assert((function() use ($insertionMode) { - $mode = self::INSERTION_MODE_NAMES[$insertionMode] ?? $insertionMode; - $this->debugLog .= " Mode: $mode (".(string) $this->stack.")\n"; - return true; - })()); - - # 13.2.6.4. The rules for parsing tokens in HTML content - // OPTIMIZATION: Evaluation the "in body" mode first is - // faster for typical documents - # 13.2.6.4.7. The "in body" insertion mode - if ($insertionMode === self::IN_BODY_MODE) { - # A start tag... - if ($token instanceof StartTagToken) { - # A start tag whose tag name is "html" - if ($token->name === 'html') { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # If there is a template element on the stack of open elements, then ignore the - # token. - if ($this->stack->find('template') === -1) { - # Otherwise, for each attribute on the token, check to see if the attribute is - # already present on the top element of the stack of open elements. If it is - # not, add the attribute and its corresponding value to that element. - $top = $this->stack[0]; - foreach ($token->attributes as $a) { - // If attribute name coercison has occurred at some earlier point, - // we must coerce all attributes on html and body start tags in - // case they are relocated to existing elements - $attrName = $this->mangledAttributes ? $this->coerceName($a->name) : $a->name; - if (!$top->hasAttributeNS(null, $attrName)) { - $this->elementSetAttribute($top, null, $attrName, $a->value); - } - } - } - } - # A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", - # "meta", "noframes", "script", "style", "template", "title" - elseif (in_array($token->name, ['base', 'basefont', 'bgsound', 'link', 'meta', 'noframes', 'script', 'style', 'template', 'title'])) { - # Process the token using the rules for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "body" - elseif ($token->name === 'body') { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # If the second element on the stack of open elements is not a body element, if - # the stack of open elements has only one node on it, or if there is a template - # element on the stack of open elements, then ignore the token. (fragment case) - if (!(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body' || $this->stack->find('template') > -1)) { - # Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute on - # the token, check to see if the attribute is already present on the body - # element (the second element) on the stack of open elements, and if it is not, - # add the attribute and its corresponding value to that element. - $this->framesetOk = false; - $body = $this->stack[1]; - foreach ($token->attributes as $a) { - // If attribute name coercison has occurred at some earlier point, - // we must coerce all attributes on html and body start tags in - // case they are relocated to existing elements - $attrName = $this->mangledAttributes ? $this->coerceName($a->name) : $a->name; - if (!$body->hasAttributeNS(null, $attrName)) { - $this->elementSetAttribute($body, null, $attrName, $a->value); - } - } - } - } - # A start tag whose tag name is "frameset" - elseif ($token->name === 'frameset') { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, 'frameset'); - - # If the stack of open elements has only one node on it, or if the second - # element on the stack of open elements is not a body element, then ignore the - # token. (fragment case) - # If the frameset-ok flag is set to "not ok", ignore the token. - if (!(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body' || $this->framesetOk === false)) { - # Otherwise, run the following steps: - # - # 1. Remove the second element on the stack of open elements from its parent - # node, if it has one. - $second = $this->stack[1]; - if ($second->parentNode) { - $second->parentNode->removeChild($second); - } - # 2. Pop all the nodes from the bottom of the stack of open elements, from the - # current node up to, but not including, the root html element. - for ($i = count($this->stack) - 1; $i > 0; $i--) { - $this->stack->pop(); - } - # 3. Insert an HTML element for the token. - $this->insertStartTagToken($token); - # 4. Switch the insertion mode to "in frameset". - $this->insertionMode = self::IN_FRAMESET_MODE; - } - } - # A start tag whose tag name is one of: "address", "article", "aside", - # "blockquote", "center", "details", "dialog", "dir", "div", "dl", "fieldset", - # "figcaption", "figure", "footer", "header", "hgroup", "menu", "main", "nav", "ol", "p", - # "section", "summary", "ul" - elseif (in_array($token->name, ['address', 'article', 'aside', 'blockquote', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'main', 'menu', 'nav', 'ol', 'p', 'section', 'summary', 'ul'])) { - # If the stack of open elements has a p element in button scope, then close a p - # element. - if ($this->stack->hasElementInButtonScope('p')) { - $this->closePElement($token); - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" - elseif (in_array($token->name, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) { - # If the stack of open elements has a p element in button scope, then close a p - # element. - if ($this->stack->hasElementInButtonScope('p')) { - $this->closePElement($token); - } - # If the current node is an HTML element whose tag name is one of "h1", "h2", - # "h3", "h4", "h5", or "h6", then this is a parse error; pop the current node - # off the stack of open elements. - if ($this->stack->currentNodeNamespace === null && (in_array($this->stack->currentNodeName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']))) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - $this->stack->pop(); - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is one of: "pre", "listing" - elseif ($token->name === 'pre' || $token->name === 'listing') { - # If the stack of open elements has a p element in button scope, then close a p - # element. - if ($this->stack->hasElementInButtonScope('p')) { - $this->closePElement($token); - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # If the next token is a U+000A LINE FEED (LF) character token, then ignore that - # token and move on to the next one. (Newlines at the start of pre blocks are - # ignored as an authoring convenience.) - $this->tokenList->next(); - $nextToken = $this->tokenList->current(); - if ($nextToken instanceof CharacterToken) { - // Character tokens in this implementation can have more than one character in - // them. - if (strlen($nextToken->data) === 1 && $nextToken->data === "\n") { - continue; - } elseif (strpos($nextToken->data, "\n") === 0) { - $nextToken->data = substr($nextToken->data, 1); - } - } - // Process the next token - $token = $nextToken; - goto ProcessToken; - } - # A start tag whose tag name is "form" - elseif ($token->name === 'form') { - # If the form element pointer is not null, and there is no template element on - # the stack of open elements, then this is a parse error; ignore the token. - $templateInStack = ($this->stack->find('template') > -1); - if ($this->formElement && !$templateInStack) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - # Otherwise: - else { - # If the stack of open elements has a p element in button scope, then close a p - # element. - if ($this->stack->hasElementInButtonScope('p')) { - $this->closePElement($token); - } - # Insert an HTML element for the token, and, if there is no template element on - # the stack of open elements, set the form element pointer to point to the - # element created. - $form = $this->insertStartTagToken($token); - if (!$templateInStack) { - $this->formElement = $form; - } - } - } - # A start tag whose tag name is "li" - elseif ($token->name === 'li') { - # 1. Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # 2. Initialize node to be the current node (the bottommost node of the stack). - # 3. Loop: If node is an li element, then run these substeps: - foreach ($this->stack as $node) { - $nodeName = $node->nodeName; - if ($nodeName === 'li') { - # 1. Generate implied end tags, except for li elements. - $this->stack->generateImpliedEndTags("li"); - # 2. If the current node is not an li element, then this is a parse error. - if ($this->stack->currentNodeName !== 'li') { - $this->error(ParseError::UNEXPECTED_START_TAG, $nodeName); - } - # 3. Pop elements from the stack of open elements until an li element has been - # popped from the stack. - $this->stack->popUntil('li'); - # 4. Jump to the step labeled Done below. - break; - } - # 4. If node is in the special category, but is not an address, div, or p - # element, then jump to the step labeled Done below. - if (!in_array($nodeName, ['address', 'div', 'p']) && $this->isElementSpecial($node)) { - break; - } - # 5. Otherwise, set node to the previous entry in the stack of open elements and - # return to the step labeled Loop. - // The loop handles that. - } - # 6. Done: If the stack of open elements has a p element in button scope, then - # close a p element. - if ($this->stack->hasElementInButtonScope('p')) { - $this->closePElement($token); - } - # 7. Finally, insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is one of: "dd", "dt" - elseif ($token->name === 'dd' || $token->name === 'dt') { - # 1. Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # 2. Initialize node to be the current node (the bottommost node of the stack). - foreach ($this->stack as $node) { - $nodeName = $node->nodeName; - // Combining these two sets of instructions as they're identical except for the - // element name. - # 3. Loop: If node is a dd element, then run these substeps: - # 4. If node is a dt element, then run these substeps: - if ($nodeName === 'dd' || $nodeName === 'dt') { - # 1. Generate implied end tags, except for dd or dt elements. - $this->stack->generateImpliedEndTags('dd', 'dt'); - # 2. If the current node is not a dd or dt element, then this is a parse error. - if ($this->stack->currentNodeName !== $nodeName) { - $this->error(ParseError::UNEXPECTED_START_TAG, $nodeName); - } - # 3. Pop elements from the stack of open elements until a dd or dt element has been - # popped from the stack. - $this->stack->popUntil('dd', 'dt'); - # 4. Jump to the step labeled Done below. - break; - } - # 5. If node is in the special category, but is not an address, div, or p - # element, then jump to the step labeled Done below. - if (!in_array($nodeName, ['address', 'div', 'p']) && $this->isElementSpecial($node)) { - break; - } - # 6. Otherwise, set node to the previous entry in the stack of open elements and - # return to the step labeled Loop. - // The loop handles that. - } - # 7. Done: If the stack of open elements has a p element in button scope, then - # close a p element. - if ($this->stack->hasElementInButtonScope('p')) { - $this->closePElement($token); - } - # 8. Finally, insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is "plaintext" - elseif ($token->name === 'plaintext') { - # If the stack of open elements has a p element in button scope, then close a p - # element. - if ($this->stack->hasElementInButtonScope('p')) { - $this->closePElement($token); - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Switch the tokenizer to the §8.2.4.5 PLAINTEXT state. - $this->tokenizer->state = Tokenizer::PLAINTEXT_STATE; - } - # A start tag whose tag name is "button" - elseif ($token->name === 'button') { - # 1. If the stack of open elements has a button element in scope, then run these - # substeps: - if ($this->stack->hasElementInScope('button')) { - # 1. Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # 2. Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # 3. Pop elements from the stack of open elements until a button element has - # been popped from the stack. - $this->stack->popUntil('button'); - } - # 2. Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # 3. Insert an HTML element for the token. - $this->insertStartTagToken($token); - # 4. Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - } - # A start tag whose tag name is "a" - elseif ($token->name === "a") { - # If the list of active formatting elements contains an a element between the end - # of the list and the last marker on the list (or the start of the list if there - # is no marker on the list), then this is a parse error; - if (($pos = $this->activeFormattingElementsList->findToMarker("a")) > -1) { - $this->error(ParseError::UNEXPECTED_START_TAG_IMPLIES_END_TAG, $token->name); - $element = $this->activeFormattingElementsList[$pos]['element']; - # ... run the adoption agency algorithm for the token, - $this->adopt($token); - # ... then remove that element from the list of active formatting elements and the - # stack of open elements if the adoption agency algorithm didn't already remove it - # (it might not have if the element is not in table scope). - $this->activeFormattingElementsList->removeSame($element); - $this->stack->removeSame($element); - } - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - $element = $this->insertStartTagToken($token); - # Push onto the list of active formatting elements that element. - $this->activeFormattingElementsList->insert($token, $element); - } - # A start tag whose tag name is one of: "b", "big", "code", - # "em", "font", "i", "s", "small", "strike", - # "strong", "tt", "u" - elseif (in_array($token->name, ["b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u"])) { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - $element = $this->insertStartTagToken($token); - # Push onto the list of active formatting elements that element. - $this->activeFormattingElementsList->insert($token, $element); - } - # A start tag whose tag name is "nobr" - elseif ($token->name === "nobr") { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # If the stack of open elements has a nobr element in scope, then this is a parse error; - if($this->stack->hasElementInScope("nobr")) { - $this->error(ParseError::UNEXPECTED_START_TAG_IMPLIES_END_TAG, $token->name); - # ... run the adoption agency algorithm for the token, - $this->adopt($token); - # ... then once again reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - } - # Insert an HTML element for the token. - $element = $this->insertStartTagToken($token); - # Push onto the list of active formatting elements that element. - $this->activeFormattingElementsList->insert($token, $element); - } - # A start tag whose tag name is one of: "applet", "marquee", "object" - elseif (in_array($token->name, ["applet", "marquee", "object"])) { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Insert a marker at the end of the list of active formatting elements. - $this->activeFormattingElementsList->insertMarker(); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - } - # A start tag whose tag name is "table" - elseif ($token->name === "table") { - # If the Document is not set to quirks mode, and the stack of open elements has a p element in button scope, then close a p element. - if ($this->quirksMode !== Parser::QUIRKS_MODE && $this->stack->hasElementInButtonScope("p")) { - $this->closePElement($token); - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # Switch the insertion mode to "in table". - $this->insertionMode = self::IN_TABLE_MODE; - } - # A start tag whose tag name is one of: "area", "br", - # "embed", "img", "keygen", "wbr" - elseif (in_array($token->name, ["area", "br", "embed", "img", "keygen", "wbr"])) { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - # Immediately pop the current node off the stack of open elements. - $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token's self-closing flag, if it is set. - $token->selfClosingAcknowledged = true; - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - } - # A start tag whose tag name is "input" - elseif ($token->name === "input") { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - # Immediately pop the current node off the stack of open elements. - $element = $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token's self-closing flag, if it is set. - $token->selfClosingAcknowledged = true; - # If the token does not have an attribute with the name "type", - # or if it does, but that attribute's value is not an ASCII - # case-insensitive match for the string "hidden", then: - # set the frameset-ok flag to "not ok". - // DEVIATION: check the element instead as this is simpler - if ($element->getAttribute("type") !== "hidden") { - $this->framesetOk = false; - } - } - # A start tag whose tag name is one of: "param", "source", "track" - elseif (in_array($token->name, ["param", "source", "track"])) { - # Insert an HTML element for the token. Immediately pop the current node off the stack of open elements. - $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token's self-closing flag, if it is set. - $token->selfClosingAcknowledged = true; - } - # A start tag whose tag name is "hr" - elseif ($token->name === "hr") { - # If the stack of open elements has a p element in button scope, then close a p element. - if ($this->stack->hasElementInButtonScope("p")) { - $this->closePElement($token); - } - # Insert an HTML element for the token. - # Immediately pop the current node off the stack of open elements. - $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token's self-closing flag, if it is set. - $token->selfClosingAcknowledged = true; - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - } - # A start tag whose tag name is "image" - elseif ($token->name === "image") { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG_ALIAS, $token->name, "img"); - # Change the token's tag name to "img" and reprocess it. (Don't ask.) - $token->name = "img"; - goto ProcessToken; - } - # A start tag whose tag name is "textarea" - elseif ($token->name === "textarea") { - # Run these steps: - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.) - # Switch the tokenizer to the RCDATA state. - $this->tokenizer->state = Tokenizer::RCDATA_STATE; - $this->tokenList->next(); - $nextToken = $this->tokenList->current(); - if ($nextToken instanceof CharacterToken) { - // Character tokens in this implementation can have more than one character in - // them. - if (strlen($nextToken->data) === 1 && $nextToken->data === "\n") { - continue; - } elseif (strpos($nextToken->data, "\n") === 0) { - $nextToken->data = substr($nextToken->data, 1); - } - } - # Let the original insertion mode be the current insertion mode. - $this->originalInsertionMode = $this->insertionMode; - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # Switch the insertion mode to "text". - $insertionMode = $this->insertionMode = self::TEXT_MODE; - // Process the next token - $token = $nextToken; - goto ProcessToken; - } - # A start tag whose tag name is "xmp" - elseif ($token->name === "xmp") { - # If the stack of open elements has a p element in button scope, then close a p element. - if ($this->stack->hasElementInButtonScope("p")) { - $this->closePElement($token); - } - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # Follow the generic raw text element parsing algorithm. - $this->parseGenericRawText($token); - } - # A start tag whose tag name is "iframe" - elseif ($token->name === "iframe") { - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # Follow the generic raw text element parsing algorithm. - $this->parseGenericRawText($token); - } - # A start tag whose tag name is "noembed" - # A start tag whose tag name is "noscript", if the scripting flag is enabled - // DEVIATION: The scripting flag is always disabled - elseif ($token->name === "noembed") { - # Follow the generic raw text element parsing algorithm. - $this->parseGenericRawText($token); - } - # A start tag whose tag name is "select" - elseif ($token->name === "select") { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # If the insertion mode is one of "in table", "in caption", - # "in table body", "in row", or "in cell", then switch - # the insertion mode to "in select in table". - if (in_array($this->insertionMode, [ - self::IN_TABLE_MODE, - self::IN_CAPTION_MODE, - self::IN_TABLE_BODY_MODE, - self::IN_ROW_MODE, - self::IN_CELL_MODE, - ])) { - $this->insertionMode = self::IN_SELECT_IN_TABLE_MODE; - } - # Otherwise, switch the insertion mode to "in select". - else { - $this->insertionMode = self::IN_SELECT_MODE; - } - } - # A start tag whose tag name is one of: "optgroup", "option" - elseif ($token->name === "optgroup" || $token->name === "option") { - # If the current node is an option element, then pop the current node off the stack of open elements. - if ($this->stack->currentNodeName === "option") { - $this->stack->pop(); - } - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is one of: "rb", "rtc" - elseif ($token->name === "rb" || $token->name === "rtc") { - # If the stack of open elements has a ruby element in scope, then generate implied end tags. - if ($this->stack->hasElementInScope("ruby")) { - $this->stack->generateImpliedEndTags(); - # If the current node is not now a ruby element, this is a parse error. - if ($this->stack->currentNodeName !== "ruby") { - $this->error(ParseError::UNEXPECTED_PARENT, $token->name, $this->stack->currentNodeName); - } - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is one of: "rp", "rt" - elseif ($token->name == "rp" || $token->name === "rt") { - # If the stack of open elements has a ruby element in scope, - # then generate implied end tags, except for rtc elements. - if ($this->stack->hasElementInScope("ruby")) { - $this->stack->generateImpliedEndTags("rtc"); - # If the current node is not now a rtc element or a ruby element, this is a parse error. - if (!in_array($this->stack->currentNodeName, ["rtc", "ruby"])) { - $this->error(ParseError::UNEXPECTED_PARENT, $token->name, $this->stack->currentNodeName); - } - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is "math" - elseif ($token->name === "math") { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.) - # Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink.) - foreach ($token->attributes as $a) { - if ($a->name === 'definitionurl') { - $a->name = 'definitionURL'; - } - $a->namespace = self::FOREIGN_ATTRIBUTE_NAMESPACE_MAP[$a->name] ?? null; - } - # Insert a foreign element for the token, in the MathML namespace. - $this->insertStartTagToken($token, null, Parser::MATHML_NAMESPACE); - # If the token has its self-closing flag set, pop the current node off the stack of open elements and acknowledge the token's self-closing flag. - if ($token->selfClosing) { - $this->stack->pop(); - $token->selfClosingAcknowledged = true; - } - } - # A start tag whose tag name is "svg" - elseif ($token->name === "svg") { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Adjust SVG attributes for the token. (This fixes the case of SVG attributes that are not all lowercase.) - # Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink in SVG.) - foreach ($token->attributes as $a) { - $a->name = self::SVG_ATTR_NAME_MAP[$a->name] ?? $a->name; - $a->namespace = self::FOREIGN_ATTRIBUTE_NAMESPACE_MAP[$a->name] ?? null; - } - # Insert a foreign element for the token, in the SVG namespace. - $this->insertStartTagToken($token, null, Parser::SVG_NAMESPACE); - # If the token has its self-closing flag set, pop the current node off the stack of open elements and acknowledge the token's self-closing flag. - if ($token->selfClosing) { - $this->stack->pop(); - $token->selfClosingAcknowledged = true; - } - } - # A start tag whose tag name is one of: "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr" - elseif (in_array($token->name, ["caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr"])) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - # Any other start tag - else { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - } - # An end tag... - elseif ($token instanceof EndTagToken) { - # An end tag whose tag name is "template" - if ($token->name === 'template') { - # Process the token using the rules for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # An end tag whose tag name is "body" - # An end tag whose tag name is "html" - elseif ($token->name === 'body' || $token->name === 'html') { - # If the stack of open elements does not have a body element in scope, this is a - # parse error; ignore the token. - if (!$this->stack->hasElementInScope('body')) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, if there is a node in the stack of open elements that is not either - # a dd element, a dt element, an li element, an optgroup element, an option - # element, a p element, an rb element, an rp element, an rt element, an rtc - # element, a tbody element, a td element, a tfoot element, a th element, a thead - # element, a tr element, the body element, or the html element, then this is a - # parse error. - else { - if ($this->stack->findNot('dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html') > -1) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Switch the insertion mode to "after body". - $insertionMode = $this->insertionMode = self::AFTER_BODY_MODE; - // The only thing different between body and html here is that when processing - // an html end tag the token is reprocessed. - if ($token->name === 'html') { - # Reprocess the token. - goto ProcessToken; - } - } - } - # An end tag whose tag name is one of: "address", "article", "aside", - # "blockquote", "button", "center", "details", "dialog", "dir", "div", "dl", - # "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", - # "main", "menu", "nav", "ol", "pre", "section", "summary", "ul" - elseif (in_array($token->name, ['address', 'article', 'aside', 'blockquote', 'button', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'listing', 'main', 'menu', 'nav', 'ol', 'pre', 'section', 'summary', 'ul'])) { - # If the stack of open elements does not have an element in scope that is an - # HTML element with the same tag name as that of the token, then this is a parse - # error; ignore the token. - if (!$this->stack->hasElementInScope($token->name)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, run these steps: - else { - # 1. Generate implied end tags. - $this->stack->generateImpliedEndTags(); - - # 2. If the current node is not an HTML element with the same tag name as that - # of the token, then this is a parse error. - if ($this->stack->currentNodeName !== $token->name) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - - # 3. Pop elements from the stack of open elements until an HTML element with the - # same tag name as the token has been popped from the stack. - $this->stack->popUntil($token->name); - } - } - # An end tag whose tag name is "form" - elseif ($token->name === 'form') { - # If there is no template element on the stack of open elements, then run these - # substeps: - if ($this->stack->find('template') === -1) { - # 1. Let node be the element that the form element pointer is set to, - # or null if it is not set to an element. - $node = $this->formElement; - # 2. Set the form element pointer to null. - $this->formElement = null; - # 3. If node is null or if the stack of open elements does not have node in - # scope, then this is a parse error; return and ignore the token. - if (!$node || !$this->stack->hasElementInScope($node)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - continue; - } - # 4. Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # 5. If the current node is not node, then this is a parse error. - if (!$this->stack->currentNode->isSameNode($node)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # 6. Remove node from the stack of open elements - $this->stack->removeSame($node); - } - # If there is a template element on the stack of open elements, then run these - # substeps instead: - else { - # 1. If the stack of open elements does not have a form element in scope, then - # this is a parse error; return and ignore the token. - if ($this->stack->hasElementInScope('form')) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - continue; - } - # 2. Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # 3. If the current node is not a form element, then this is a parse error. - if (!$this->stack->currentNodeName !== 'form') { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # 4. Pop elements from the stack of open elements until a form element has been - # popped from the stack. - $this->stack->popUntil('form'); - } - } - # An end tag whose tag name is "p" - elseif ($token->name === "p") { - # If the stack of open elements does not have a p element in button scope, then this is a parse error; - if (!$this->stack->hasElementInButtonScope("p")) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - # insert an HTML element for a "p" start tag token with no attributes. - $this->insertStartTagToken(new StartTagToken("p")); - } - # Close a p element. - $this->closePElement($token); - } - # An end tag whose tag name is "li" - elseif ($token->name === "li") { - # If the stack of open elements does not have an li element in - # list item scope, then this is a parse error; ignore the token. - if (!$this->stack->hasElementInListItemScope("li")) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, run these steps: - else { - # Generate implied end tags, except for li elements. - $this->stack->generateImpliedEndTags("li"); - # If the current node is not an li element, then this is a parse error. - if ($this->stack->currentNodeName !== "li" || $this->stack->currentNodeNamespace !== null) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop elements from the stack of open elements until an li element has been popped from the stack. - $this->stack->popUntil("li"); - } - } - # An end tag whose tag name is one of: "dd", "dt" - elseif ($token->name === "dd" || $token->name === "dt") { - # If the stack of open elements does not have an element in - # scope that is an HTML element with the same tag name as that of - # the token, then this is a parse error; ignore the token. - if (!$this->stack->hasElementInScope($token->name)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, run these steps: - else { - # Generate implied end tags, except for HTML elements - # with the same tag name as the token. - $this->stack->generateImpliedEndTags($token->name); - # If the current node is not an HTML element with the same - # tag name as that of the token, then this is a parse error. - if ($this->stack->currentNodeName !== $token->name || $this->stack->currentNodeNamespace !== null) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop elements from the stack of open elements until an HTML - # element with the same tag name as the token has been - # popped from the stack. - $this->stack->popUntil($token->name); - } - } - # An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" - elseif (in_array($token->name, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) { - # If the stack of open elements does not have an element in scope - # that is an HTML element and whose tag name is one of "h1", "h2", - # "h3", "h4", "h5", or "h6", then this is a parse error; ignore the token. - if (!$this->stack->hasElementInScope("h1", "h2", "h3", "h4", "h5", "h6")) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, run these steps: - else { - # Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # If the current node is not an HTML element with the same tag name - # as that of the token, then this is a parse error. - if ($this->stack->currentNodeName !== $token->name || $this->stack->currentNodeNamespace !== null) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop elements from the stack of open elements until an HTML - # element whose tag name is one of "h1", "h2", "h3", "h4", - # "h5", or "h6" has been popped from the stack. - $this->stack->popUntil("h1", "h2", "h3", "h4", "h5", "h6"); - } - } - # An end tag whose tag name is "sarcasm" - # Take a deep breath, then act as described in - # the "any other end tag" entry below. - # An end tag whose tag name is one of: "a", "b", "big", - # "code", "em", "font", "i", "nobr", "s", "small", - # "strike", "strong", "tt", "u" - elseif (in_array($token->name, ["a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u"])) { - # Run the adoption agency algorithm for the token. - // OPTIMIZATION: Only run the adoption agency if it's necessary - if ( - $token->name == $this->stack->currentNodeName - && $this->stack->currentNodeNamespace == null - && count($this->activeFormattingElementsList) - && $this->activeFormattingElementsList->top()['element']->isSameNode($this->stack->currentNode) - ) { - $this->stack->pop(); - $this->activeFormattingElementsList->pop(); - } else { - $this->adopt($token); - } - } - # An end tag token whose tag name is one of: "applet", "marquee", "object" - elseif (in_array($token->name, ["applet", "marquee", "object"])) { - # If the stack of open elements does not have an element in scope that - # is an HTML element with the same tag name as that of the token, then - # this is a parse error; ignore the token. - if (!$this->stack->hasElementInScope($token->name)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, run these steps: - else { - # Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # If the current node is not an HTML element with the same tag - # name as that of the token, then this is a parse error. - if ($this->stack->currentNodeName !== $token->name || $this->stack->currentNodeNamespace !== null) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop elements from the stack of open elements until an HTML - # element with the same tag name as the token has been - # popped from the stack. - $this->stack->popUntil($token->name); - # Clear the list of active formatting elements up to the last marker. - $this->activeFormattingElementsList->clearToTheLastMarker(); - } - } - # An end tag whose tag name is "br" - elseif ($token->name === "br") { - # Parse error. Drop the attributes from the token, and act as described - # in the next entry; i.e. act as if this was a "br" start tag token with - # no attributes, rather than the end tag token that it actually is. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - $token = new StartTagToken("br"); - goto ProcessToken; - } - # Any other end tag - else { - // NOTE: This logic is reproduced in the adoption agency below. - // Changes here should be mirrored there, and vice versa - # Run these steps: - # Initialize node to be the current node (the bottommost node of the stack). - foreach ($this->stack as $node) { - # Loop: If node is an HTML element with the same tag name as the token, then: - if ($node->nodeName === $token->name && $node->namespaceURI === null) { - # Generate implied end tags, except for HTML elements with the same tag name as the token. - $this->stack->generateImpliedEndTags($token->name); - # If node is not the current node, then this is a parse error. - if (!$node->isSameNode($this->stack->currentNode)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop all the nodes from the current node up to node, including node, then stop these steps. - $this->stack->popUntilSame($node); - continue 2; - } - # Otherwise, if node is in the special category, then - # this is a parse error; ignore the token, and return. - elseif ($this->isElementSpecial($node)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - continue 2; - } - # Set node to the previous entry in the stack of open elements. - # Return to the step labeled loop. - } - } - } - # A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED - # (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - elseif ($token instanceof WhitespaceToken) { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert the token’s character. - $this->insertCharacterToken($token); - } - # A character token that is U+0000 NULL - elseif ($token instanceof NullCharacterToken) { - # Parse error. Ignore the token - // DEVIATION: the parse error is already reported by the tokenizer; - // this is probably an oversight in the specification, so we don't - // report it a second time - } - # Any other character token - elseif ($token instanceof CharacterToken) { - # Reconstruct the active formatting elements, if any. - $this->reconstructActiveFormattingElements(); - # Insert the token’s character. - $this->insertCharacterToken($token); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # If the stack of template insertion modes is not empty, then process the token using the rules for the "in template" insertion mode. - if (count($this->templateInsertionModes) !== 0) { - $insertionMode = self::IN_TEMPLATE_MODE; - goto ProcessToken; - } - - # Otherwise, follow these steps: - # 1. If there is a node in the stack of open elements that is not either a dd - # element, a dt element, an li element, an optgroup element, an option element, - # a p element, an rb element, an rp element, an rt element, an rtc element, a - # tbody element, a td element, a tfoot element, a th element, a thead element, a - # tr element, the body element, or the html element, then this is a parse error. - if ($this->stack->findNot('dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html') > -1) { - $this->error(ParseError::UNEXPECTED_EOF); - } - - # 2. Stop parsing. - return; - } - } - # 13.2.6.4.1. The "initial" insertion mode - elseif ($insertionMode === self::INITIAL_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED - # (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - // OPTIMIZATION: Will check for multiple space characters at once as character - // tokens can contain more than one character. - if ($token instanceof WhitespaceToken) { - # Ignore the token. - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment as the last child of the Document object. - // DEVIATION: PHP's DOM does not allow comments as children of the document - // and silently drops them, so this is actually a no-op - $this->insertCommentToken($token, $this->DOM); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # If the DOCTYPE token's name is not "html", or the token's public identifier is - # not missing, or the token's system identifier is neither missing nor - # "about:legacy-compat", then there is a parse error. - if ($token->name !== 'html' || $token->public !== null || !($token->system === null || $token->system === 'about:legacy-compat')) { - $this->error(ParseError::UNKNOWN_DOCTYPE); - } - - # Append a DocumentType node to the Document node, with the name attribute set - # to the name given in the DOCTYPE token, or the empty string if the name was - # missing; the publicId attribute set to the public identifier given in the - # DOCTYPE token, or the empty string if the public identifier was missing; the - # systemId attribute set to the system identifier given in the DOCTYPE token, or - # the empty string if the system identifier was missing; and the other - # attributes specific to DocumentType objects set to null and empty lists as - # appropriate. Associate the DocumentType node with the Document object so that - # it is returned as the value of the doctype attribute of the Document object. - $this->DOM->appendChild($this->DOM->implementation->createDocumentType($token->name ?? ' ', $token->public ?? '', $token->system ?? '')); - - - # Then, if the document is not an iframe srcdoc document, and the DOCTYPE token - # matches one of the conditions in the following list, then set the Document to - # quirks mode: - // DEVIATION: This implementation does not render, so there is no nested - // browsing contexts to consider. - $public = strtolower($token->public ?? ''); - $system = strtolower($token->system ?? ''); - if ($token->forceQuirks === true - || $token->name !== 'html' - || $public === '-//w3o//dtd w3 html strict 3.0//en//' - || $public === '-/w3c/dtd html 4.0 transitional/en' - || $public === 'html' - || $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd' - || strpos($public, '+//silmaril//dtd html pro v0r11 19970101//') === 0 - || strpos($public, '-//as//dtd html 3.0 aswedit + extensions//') === 0 - || strpos($public, '+//silmaril//dtd html pro v0r11 19970101//') === 0 - || strpos($public, '-//as//dtd html 3.0 aswedit + extensions//') === 0 - || strpos($public, '-//advasoft ltd//dtd html 3.0 aswedit + extensions//') === 0 - || strpos($public, '-//ietf//dtd html 2.0 level 1//') === 0 - || strpos($public, '-//ietf//dtd html 2.0 level 2//') === 0 - || strpos($public, '-//ietf//dtd html 2.0 strict level 1//') === 0 - || strpos($public, '-//ietf//dtd html 2.0 strict level 2//') === 0 - || strpos($public, '-//ietf//dtd html 2.0 strict//') === 0 - || strpos($public, '-//ietf//dtd html 2.0//') === 0 - || strpos($public, '-//ietf//dtd html 2.1e//') === 0 - || strpos($public, '-//ietf//dtd html 3.0//') === 0 - || strpos($public, '-//ietf//dtd html 3.2 final//') === 0 - || strpos($public, '-//ietf//dtd html 3.2//') === 0 - || strpos($public, '-//ietf//dtd html 3//') === 0 - || strpos($public, '-//ietf//dtd html level 0//') === 0 - || strpos($public, '-//ietf//dtd html level 1//') === 0 - || strpos($public, '-//ietf//dtd html level 2//') === 0 - || strpos($public, '-//ietf//dtd html level 3//') === 0 - || strpos($public, '-//ietf//dtd html strict level 0//') === 0 - || strpos($public, '-//ietf//dtd html strict level 1//') === 0 - || strpos($public, '-//ietf//dtd html strict level 2//') === 0 - || strpos($public, '-//ietf//dtd html strict level 3//') === 0 - || strpos($public, '-//ietf//dtd html strict//') === 0 - || strpos($public, '-//ietf//dtd html//') === 0 - || strpos($public, '-//metrius//dtd metrius presentational//') === 0 - || strpos($public, '-//microsoft//dtd internet explorer 2.0 html strict//') === 0 - || strpos($public, '-//microsoft//dtd internet explorer 2.0 html//') === 0 - || strpos($public, '-//microsoft//dtd internet explorer 2.0 tables//') === 0 - || strpos($public, '-//microsoft//dtd internet explorer 3.0 html strict//') === 0 - || strpos($public, '-//microsoft//dtd internet explorer 3.0 html//') === 0 - || strpos($public, '-//microsoft//dtd internet explorer 3.0 tables//') === 0 - || strpos($public, '-//netscape comm. corp.//dtd html//') === 0 - || strpos($public, '-//netscape comm. corp.//dtd strict html//') === 0 - || strpos($public, '-//o\'reilly and associates//dtd html 2.0//') === 0 - || strpos($public, '-//o\'reilly and associates//dtd html extended 1.0//') === 0 - || strpos($public, '-//o\'reilly and associates//dtd html extended relaxed 1.0//') === 0 - || strpos($public, '-//sq//dtd html 2.0 hotmetal + extensions//') === 0 - || strpos($public, '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//') === 0 - || strpos($public, '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//') === 0 - || strpos($public, '-//spyglass//dtd html 2.0 extended//') === 0 - || strpos($public, '-//sun microsystems corp.//dtd hotjava html//') === 0 - || strpos($public, '-//sun microsystems corp.//dtd hotjava strict html//') === 0 - || strpos($public, '-//w3c//dtd html 3 1995-03-24//') === 0 - || strpos($public, '-//w3c//dtd html 3.2 draft//') === 0 - || strpos($public, '-//w3c//dtd html 3.2 final//') === 0 - || strpos($public, '-//w3c//dtd html 3.2//') === 0 - || strpos($public, '-//w3c//dtd html 3.2s draft//') === 0 - || strpos($public, '-//w3c//dtd html 4.0 frameset//') === 0 - || strpos($public, '-//w3c//dtd html 4.0 transitional//') === 0 - || strpos($public, '-//w3c//dtd html experimental 19960712//') === 0 - || strpos($public, '-//w3c//dtd html experimental 970421//') === 0 - || strpos($public, '-//w3c//dtd w3 html//') === 0 - || strpos($public, '-//w3o//dtd w3 html 3.0//') === 0 - || strpos($public, '-//webtechs//dtd mozilla html 2.0//') === 0 - || strpos($public, '-//webtechs//dtd mozilla html//') === 0 - || ($token->system === null && strpos($public, '-//w3c//dtd html 4.01 frameset//') === 0) - || ($token->system === null && strpos($public, '-//w3c//dtd html 4.01 transitional//') === 0) - ) { - $this->quirksMode = Parser::QUIRKS_MODE; - } - # Otherwise, if the document is not an iframe srcdoc document, and the DOCTYPE - # token matches one of the conditions in the following list, then set the - # Document to limited-quirks mode: - // DEVIATION: There is no iframe srcdoc document because there are no nested - // browsing contexts in this implementation. - elseif ( - strpos($public, '-//w3c//dtd xhtml 1.0 frameset//') === 0 - || strpos($public, '-//w3c//dtd xhtml 1.0 transitional//') === 0 - || ($token->system !== null && strpos($public, '-//w3c//dtd html 4.01 frameset//') === 0) - || ($token->system !== null && strpos($public, '-//w3c//dtd html 4.01 transitional//') === 0) - ) { - $this->quirksMode = Parser::LIMITED_QUIRKS_MODE; - } - # The system identifier and public identifier strings must be compared to the - # values given in the lists above in an ASCII case-insensitive manner. A system - # identifier whose value is the empty string is not considered missing for the - # purposes of the conditions above. - - # Then, switch the insertion mode to "before html". - $this->insertionMode = self::BEFORE_HTML_MODE; - } - # Anything else - else { - # If the document is not an iframe srcdoc document, then this is a parse error; - # set the Document to quirks mode. - // DEVIATION: There is no iframe srcdoc document because there are no nested - // browsing contexts in this implementation. - assert($token instanceof CharacterToken || $token instanceof TagToken || $token instanceof EOFToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token))); - if ($token instanceof StartTagToken) { - $this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_END_TAG, $token->name); - } elseif ($token instanceof CharacterToken) { - $this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_CHARS); - } elseif ($token instanceof EOFToken) { - $this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_EOF); - } - - $this->quirksMode = Parser::QUIRKS_MODE; - - # In any case, switch the insertion mode to "before html", then reprocess the - # token. - $insertionMode = $this->insertionMode = self::BEFORE_HTML_MODE; - goto ProcessToken; - }; - } - # 13.2.6.4.2. The "before html" insertion mode - elseif ($insertionMode === self::BEFORE_HTML_MODE) { - # A DOCTYPE token - if ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment as the last child of the Document object. - $this->insertCommentToken($token, $this->DOM); - } - # A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED - # (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - // OPTIMIZATION: Will check for multiple space characters at once as character - // tokens can contain more than one character. - elseif ($token instanceof WhitespaceToken) { - # Ignore the token. - } - # A start tag whose tag name is "html" - elseif ($token instanceof StartTagToken && $token->name === 'html') { - # Create an element for the token in the HTML namespace, with the Document as - # the intended parent. Append it to the Document object. Put this element in the - # stack of open elements. - $this->insertStartTagToken($token, $this->DOM); - - # Switch the insertion mode to "before head". - $this->insertionMode = self::BEFORE_HEAD_MODE; - } - # An end tag whose tag name is one of: "head", "body", "html", "br" - # Act as described in the "anything else" entry below. - # Any other end tag - elseif ($token instanceof EndTagToken && $token->name !== 'head' && $token->name !== 'body' && $token->name !== 'html' && $token->name !== 'br') { - # Parse error. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Anything else - else { - # Create an html element whose node document is the Document object. Append it - # to the Document object. Put this element in the stack of open elements. - $element = $this->DOM->createElement('html'); - $this->DOM->appendChild($element); - $this->stack[] = $element; - - # Switch the insertion mode to "before head", then reprocess the token. - $insertionMode = $this->insertionMode = self::BEFORE_HEAD_MODE; - goto ProcessToken; - } - - # The document element can end up being removed from the Document object, e.g., - # by scripts; nothing in particular happens in such cases, content continues - # being appended to the nodes as described in the next section. - // Good to know. There's no scripting in this implementation, though. - } - # 13.2.6.4.3. The "before head" insertion mode - elseif ($insertionMode === self::BEFORE_HEAD_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED - # (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - // OPTIMIZATION: Will check for multiple space characters at once as character - // tokens can contain more than one character. - if ($token instanceof WhitespaceToken) { - # Ignore the token. - } - # A comment token - elseif ($token instanceof CommentToken) { - # insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag whose tag name is "html" - elseif ($token instanceof StartTagToken && $token->name === 'html') { - # Process the token using the rules for the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "head" - elseif ($token instanceof StartTagToken && $token->name === 'head') { - # Insert an HTML element for the token. - $element = $this->insertStartTagToken($token); - # Set the head element pointer to the newly created head element. - $this->headElement = $element; - # Switch the insertion mode to "in head". - $insertionMode = $this->insertionMode = self::IN_HEAD_MODE; - } - # An end tag whose tag name is one of: "head", "body", "html", "br" - # Act as described in the "anything else" entry below. - # Any other end tag - elseif ($token instanceof EndTagToken && $token->name !== 'head' && $token->name !== 'body' && $token->name !== 'html' && $token->name !== 'br') { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Anything else - else { - # Insert an HTML element for a "head" start tag token with no attributes. - $element = $this->insertStartTagToken(new StartTagToken('head')); - # Set the head element pointer to the newly created head element. - $this->headElement = $element; - # Switch the insertion mode to "in head". - $insertionMode = $this->insertionMode = self::IN_HEAD_MODE; - # Reprocess the current token. - goto ProcessToken; - } - } - # 13.2.6.4.4. The "in head" insertion mode - elseif ($insertionMode === self::IN_HEAD_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED - # (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - // OPTIMIZATION: Will check for multiple space characters at once as character - // tokens can contain more than one character. - if ($token instanceof WhitespaceToken) { - # Insert the character. - $this->insertCharacterToken($token); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is "html" - if ($token->name === 'html') { - # Process the token using the rules for the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is one of: "base", "basefont", "bgsound", "link" - elseif (in_array($token->name, ['base', 'basefont', 'bgsound', 'link'])) { - # Insert an HTML element for the token. - # Immediately pop the current node off the stack of open elements. - $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token’s *self-closing flag*, if it is set. - $token->selfClosingAcknowledged = true; - } - # A start tag whose tag name is "meta" - elseif ($token->name === 'meta') { - # Insert an HTML element for the token. - # Immediately pop the current node off the stack of open elements. - $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token’s *self-closing flag*, if it is set. - $token->selfClosingAcknowledged = true; - - # If the element has a charset attribute, and getting an encoding from its value - # results in an encoding, and the confidence is currently tentative, then change - # the encoding to the resulting encoding. - # Otherwise, if the element has an http-equiv attribute whose value is an ASCII - # case-insensitive match for the string "Content-Type", and the element has a - # content attribute, and applying the algorithm for extracting a character - # encoding from a meta element to that attribute’s value returns an encoding, - # and the confidence is currently tentative, then change the encoding to the - # extracted encoding. - // DEVIATION: FIXME: This implementation does not support changing the encoding mid-stream - } - # A start tag whose tag name is "title" - elseif ($token->name === 'title') { - # Follow the generic RCDATA element parsing algorithm. - $this->parseGenericRCDATA($token); - } - # A start tag whose tag name is "noscript", if the scripting flag is enabled - # A start tag whose tag name is one of: "noframes", "style" - // DEVIATION: There is no scripting in this implementation, so the scripting - // flag is always disabled. - elseif ($token->name === 'noframes' || $token->name === 'style') { - # Follow the generic raw text element parsing algorithm. - $this->parseGenericRawText($token); - } - # A start tag whose tag name is "noscript", if the scripting flag is disabled - // DEVIATION: There is no scripting in this implementation, so the scripting - // flag is always disabled. - elseif ($token->name === 'noscript') { - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Switch the insertion mode to "in head noscript". - $this->insertionMode = self::IN_HEAD_NOSCRIPT_MODE; - } - # A start tag whose tag name is "script" - elseif ($token->name === 'script') { - # Run these steps: - - # 1. Let the adjusted insertion location be the appropriate place for inserting - # a node. - # 2. Create an element for the token in the HTML namespace, with the intended - # parent being the element in which the adjusted insertion location finds - # itself. - // DEVIATION: Because there is no scripting in this implementation, there is no - // need to get the adjusted insertion location as the intended parent isn't used - // when determining anything; Parser::createAndInsertElement will get the - // adjusted insertion location anyway. - $this->insertStartTagToken($token); - - # 3. Mark the element as being "parser-inserted" and unset the element’s - # "non-blocking" flag. - # 4. Mark the element as being "parser-inserted" and unset the element’s - # "non-blocking" flag. - // DEVIATION: No scripting. - # 5. Insert the newly created element at the adjusted insertion location. - // Done. - # 6. Push the element onto the stack of open elements so that it is the new - # current node. - // The element insertion algorithm has it do this already... - # 7. Switch the tokenizer to the script data state. - $this->tokenizer->state = Tokenizer::SCRIPT_DATA_STATE; - # 8. Let the original insertion mode be the current insertion mode. - $this->originalInsertionMode = $this->insertionMode; - # 9. Switch the insertion mode to "text". - $this->insertionMode = self::TEXT_MODE; - } - # A start tag whose tag name is "template" - elseif ($token->name === 'template') { - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Insert a marker at the end of the list of active formatting elements. - $this->activeFormattingElementsList->insertMarker(); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # Switch the insertion mode to "in template". - $this->insertionMode = self::IN_TEMPLATE_MODE; - # Push "in template" onto the stack of template insertion modes so that it is - # the new current template insertion mode. - $this->templateInsertionModes[] = self::IN_TEMPLATE_MODE; - } - # A start tag whose tag name is "head" - elseif ($token->name === 'head') { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - # Any other start tag - else { - # Act as described in the "anything else" entry below. - - # Pop the current node (which will be the head element) off - # the stack of open elements. - $this->stack->pop(); - # Switch the insertion mode to "after head". - $insertionMode = $this->insertionMode = self::AFTER_HEAD_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # And end tag... - elseif ($token instanceof EndTagToken) { - # An end tag whose tag name is "head" - if ($token->name === 'head') { - # Pop the current node (which will be the head element) off - # the stack of open elements. - $this->stack->pop(); - # Switch the insertion mode to "after head". - $this->insertionMode = self::AFTER_HEAD_MODE; - } - # An end tag whose tag name is one of: "body", "html", "br" - elseif (in_array($token->name, ['body', 'html', 'br'])) { - # Act as described in the "anything else" entry below. - - # Pop the current node (which will be the head element) off - # the stack of open elements. - $this->stack->pop(); - # Switch the insertion mode to "after head". - $insertionMode = $this->insertionMode = self::AFTER_HEAD_MODE; - # Reprocess the token. - goto ProcessToken; - } - # An end tag whose tag name is "template" - elseif ($token->name === 'template') { - # If there is no template element on the stack of open elements, then this is a - # parse error; ignore the token. - if ($this->stack->find('template') === -1) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, run these steps: - else { - # 1. Generate all implied end tags thoroughly. - $this->stack->generateImpliedEndTagsThoroughly(); - # 2. If the current node is not a template element, then this is a parse error. - if ($this->stack->currentNodeName !== 'template') { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # 3. Pop elements from the stack of open elements until a template element has been popped from the stack. - $this->stack->popUntil('template'); - # 4. Clear the list of active formatting elements up to the last marker. - $this->activeFormattingElementsList->clearToTheLastMarker(); - # 5. Pop the current template insertion mode off the stack of template insertion modes. - $this->templateInsertionModes->pop(); - # 6. Reset the insertion mode appropriately. - $this->resetInsertionMode(); - } - } - # Any other end tag - else { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # Anything else - else { - # Pop the current node (which will be the head element) off the stack of open - # elements. - $this->stack->pop(); - # Switch the insertion mode to "after head". - $insertionMode = $this->insertionMode = self::AFTER_HEAD_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # 13.2.6.4.5. The "in head noscript" insertion mode - elseif ($insertionMode === self::IN_HEAD_NOSCRIPT_MODE) { - # DOCTYPE token - if ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is "html" - if ($token->name === 'html') { - # Process the token using the rules for the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is one of: "basefont", "bgsound", "link", "meta", - # "noframes", "style" - elseif (in_array($token->name, ['basefont', 'bgsound', 'link', 'meta', 'noframes', 'style'])){ - # Process the token using the rules for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # A start tag whose tag name is one of: "head", "noscript" - elseif ($token->name === 'head' || $token->name === 'noscript') { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - # Any other start tag - else { - # Act as described in the "anything else" entry below. - - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # Pop the current node (which will be a noscript element) from the stack of open - # elements; the new current node will be a head element. - $this->stack->pop(); - # Switch the insertion mode to "in head". - $insertionMode = $this->insertionMode = self::IN_HEAD_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # An end tag whose tag name is "noscript" - elseif ($token instanceof EndTagToken && $token->name === 'noscript') { - # Pop the current node (which will be a noscript element) from the stack of open - # elements; the new current node will be a head element. - $this->stack->pop(); - # Switch the insertion mode to "in head". - $this->insertionMode = self::IN_HEAD_MODE; - } - # An end tag whose name is "br" - # Act as described in the "anything else" entry below. - # Any other end tag - elseif ($token instanceof EndTagToken && $token->name !== 'br') { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED - # (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - # A comment token - elseif ($token instanceof CommentToken || $token instanceof WhitespaceToken) { - # Process the token using the rules for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # Anything else - else { - # Parse error. - if ($token instanceof EndTagToken) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } elseif ($token instanceof CharacterToken) { - $this->error(ParseError::UNEXPECTED_CHAR); - } elseif ($token instanceof EOFToken) { - $this->error(ParseError::UNEXPECTED_EOF); - } - # Pop the current node (which will be a noscript element) from the stack - # of open elements; the new current node will be a head element. - $this->stack->pop(); - # Switch the insertion mode to "in head". - $insertionMode = $this->insertionMode = self::IN_HEAD_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # 13.2.6.4.6. The "after head" insertion mode - elseif ($insertionMode === self::AFTER_HEAD_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED - # (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - // OPTIMIZATION: Will check for multiple space characters at once as character - // tokens can contain more than one character. - if ($token instanceof WhitespaceToken) { - # Insert the character. - $this->insertCharacterToken($token); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is "html" - if ($token->name === 'html') { - # Process the token using the rules for the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "body" - elseif ($token->name === 'body') { - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # Switch the insertion mode to "in body". - $this->insertionMode = self::IN_BODY_MODE; - } - # A start tag whose tag name is "frameset" - elseif ($token->name === 'frameset') { - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Switch the insertion mode to "in frameset". - $this->insertionMode = self::IN_FRAMESET_MODE; - } - # A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", - # "meta", "noframes", "script", "style", "template", "title" - elseif (in_array($token->name, ['base', 'basefont', 'bgsound', 'link', 'meta', 'noframes', 'script', 'style', 'template', 'title'])) { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # Push the node pointed to by the head element pointer onto the stack of open elements. - $this->stack[] = $this->headElement; - # Process the token using the rules for the "in head" insertion mode. - // The relevant rules for the mode are reproduced here in minimal form - if ($token->name === 'title') { - $this->parseGenericRCDATA($token); - } - elseif ($token->name === 'noframes' || $token->name === 'style') { - $this->parseGenericRawText($token); - } - elseif ($token->name === 'noscript') { - $this->insertStartTagToken($token); - $this->insertionMode = self::IN_HEAD_NOSCRIPT_MODE; - } - elseif ($token->name === 'script') { - $this->insertStartTagToken($token); - $this->tokenizer->state = Tokenizer::SCRIPT_DATA_STATE; - $this->originalInsertionMode = $this->insertionMode; - $this->insertionMode = self::TEXT_MODE; - } - elseif ($token->name === 'template') { - $this->insertStartTagToken($token); - $this->activeFormattingElementsList->insertMarker(); - $this->framesetOk = false; - $this->insertionMode = self::IN_TEMPLATE_MODE; - $this->templateInsertionModes[] = self::IN_TEMPLATE_MODE; - } else { - $this->insertStartTagToken($token); - $this->stack->pop(); - $token->selfClosingAcknowledged = true; - } - # Remove the node pointed to by the head element pointer from the stack of open - # elements. (It might not be the current node at this point.) - $this->stack->removeSame($this->headElement); - } - # A start tag whose tag name is "head" - elseif ($token->name === 'head') { - # Parse error. Ignore the token - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - # Any other start tag - else { - # Act as described in the "anything else" entry below. - - # Insert an HTML element for a "body" start tag token with no attributes. - $this->insertStartTagToken(new StartTagToken('body')); - # Switch the insertion mode to "in body". - $insertionMode = $this->insertionMode = self::IN_BODY_MODE; - # Reprocess the current token. - goto ProcessToken; - } - } - elseif ($token instanceof EndTagToken) { - # An end tag whose tag name is "template" - if ($token->name === 'template') { - # Process the token using the rules for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # An end tag whose tag name is one of: "body", "html", "br" - elseif (in_array($token->name, ['body', 'html', 'br'])) { - # Act as described in the "anything else" entry below. - # - # Insert an HTML element for a "body" start tag token with no attributes. - $this->insertStartTagToken(new StartTagToken('body')); - # Switch the insertion mode to "in body". - $insertionMode = $this->insertionMode = self::IN_BODY_MODE; - # Reprocess the current token. - goto ProcessToken; - } - # Any other end tag - else { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # Anything else - else { - # Insert an HTML element for a "body" start tag token with no attributes. - $this->insertStartTagToken(new StartTagToken('body')); - # Switch the insertion mode to "in body". - $insertionMode = $this->insertionMode = self::IN_BODY_MODE; - # Reprocess the current token. - goto ProcessToken; - } - } - # 13.2.6.4.8 The "text" insertion mode - elseif ($insertionMode === self::TEXT_MODE) { - # A character token - if ($token instanceof CharacterToken) { - # Insert the token's character. - $this->insertCharacterToken($token); - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Parse error. - $this->error(ParseError::UNEXPECTED_EOF); - # If the current node is a script element, mark the script - # element as "already started". - // DEVIATION: Scripting is not supported - # Pop the current node off the stack of open elements. - $this->stack->pop(); - # Switch the insertion mode to the original insertion mode and - # reprocess the token. - $insertionMode = $this->insertionMode = $this->originalInsertionMode; - goto ProcessToken; - } - # An end tag whose tag name is "script" - // DEVIATION: Scripting is not supported, so there is no special handling - # Any other end tag - elseif ($token instanceof EndTagToken) { - # Pop the current node off the stack of open elements. - $this->stack->pop(); - # Switch the insertion mode to the original insertion mode. - $this->insertionMode = $this->originalInsertionMode; - } - // Anything else - else { - // No other cases are possible - throw new Exception(Exception::UNREACHABLE_CODE); // @codeCoverageIgnore - } - } - # 13.2.6.4.9 The "in table" insertion mode - elseif ($insertionMode === self::IN_TABLE_MODE) { - // NOTE: Foster parenting is turned off when evaluating this - // mode as it may have been turned on in a previous evluation - // of this mode - $this->fosterParenting = false; - # A character token, if the current node is table, tbody, tfoot, thead, or tr element - if ($token instanceof CharacterToken && in_array($this->stack->currentNodeName, ["table", "tbody", "tfoot", "thead", "tr"])) { - # Let the pending table character tokens be an empty list of tokens. - $this->pendingTableCharacterTokens = []; - # Let the original insertion mode be the current insertion mode. - $this->originalInsertionMode = $this->insertionMode; - # Switch the insertion mode to "in table text" and reprocess the token. - $insertionMode = $this->insertionMode = self::IN_TABLE_TEXT_MODE; - goto ProcessToken; - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is "caption" - if ($token->name === "caption") { - # Clear the stack back to a table context. (See below.) - $this->stack->clearToTableContext(); - # Insert a marker at the end of the list of active - # formatting elements. - $this->activeFormattingElementsList->insertMarker(); - # Insert an HTML element for the token, then switch the - # insertion mode to "in caption". - $this->insertStartTagToken($token); - $this->insertionMode = self::IN_CAPTION_MODE; - } - # A start tag whose tag name is "colgroup" - elseif ($token->name === "colgroup") { - # Clear the stack back to a table context. (See below.) - $this->stack->clearToTableContext(); - # Insert an HTML element for the token, then switch the - # insertion mode to "in column group". - $this->insertStartTagToken($token); - $this->insertionMode = self::IN_COLUMN_GROUP_MODE; - } - # A start tag whose tag name is "col" - elseif ($token->name === "col") { - # Clear the stack back to a table context. (See below.) - $this->stack->clearToTableContext(); - # Insert an HTML element for a "colgroup" start tag token - # with no attributes, then switch the insertion mode to - # "in column group". - $this->insertStartTagToken(new StartTagToken("colgroup")); - $insertionMode = $this->insertionMode = self::IN_COLUMN_GROUP_MODE; - # Reprocess the current token. - goto ProcessToken; - } - # A start tag whose tag name is one of: "tbody", "tfoot", "thead" - elseif (in_array($token->name, ["tbody", "tfoot", "thead"])) { - # Clear the stack back to a table context. (See below.) - $this->stack->clearToTableContext(); - # Insert an HTML element for the token, then switch the - # insertion mode to "in table body". - $this->insertStartTagToken($token); - $this->insertionMode = self::IN_TABLE_BODY_MODE; - } - # A start tag whose tag name is one of: "td", "th", "tr" - elseif (in_array($token->name, ["td", "th", "tr"])) { - # Clear the stack back to a table context. (See below.) - $this->stack->clearToTableContext(); - # Insert an HTML element for a "tbody" start tag token - # with no attributes, then switch the insertion mode - # to "in table body". - $this->insertStartTagToken(new StartTagToken("tbody")); - $insertionMode = $this->insertionMode = self::IN_TABLE_BODY_MODE; - # Reprocess the current token. - goto ProcessToken; - } - # A start tag whose tag name is "table" - elseif ($token->name === "table") { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # If the stack of open elements does not have a table - # element in table scope, ignore the token. - if (!$this->stack->hasElementInTableScope("table")) { - // Ignore the token - } - # Otherwise: - else { - # Pop elements from this stack until a table element - # has been popped from the stack. - $this->stack->popUntil("table"); - # Reset the insertion mode appropriately. - $insertionMode = $this->resetInsertionMode(); - # Reprocess the token. - goto ProcessToken; - } - } - # A start tag whose tag name is one of: "style", "script", "template" - elseif (in_array($token->name, ["style", "script", "template"])) { - # Process the token using the rules for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "input" - elseif ($token->name === "input") { - # If the token does not have an attribute with the name - # "type", or if it does, but that attribute's value is - # not an ASCII case-insensitive match for the string - # "hidden", then: act as described in the - # "anything else" entry below. - if (!$token->hasAttribute("type") || strtolower($token->getAttribute("type")->value) !== "hidden") { - goto InTableAnythingElse; - } - # Otherwise: - else { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - # Pop that input element off the stack of open elements. - $this->stack->pop(); - # Acknowledge the token's self-closing flag, if it is set. - $token->selfClosingAcknowledged = true; - } - } - # A start tag whose tag name is "form" - elseif ($token->name === "form") { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # If there is a template element on the stack of open - # elements, or if the form element pointer is not null, - # ignore the token. - if ($this->formElement || $this->stack->find("template") > -1) { - // Ignore the token - } - # Otherwise: - else { - # Insert an HTML element for the token, and set the form - # element pointer to point to the element created. - $element = $this->insertStartTagToken($token); - $this->formElement = $element; - # Pop that form element off the stack of open elements. - $this->stack->pop(); - } - } - // Any other start tag - else { - goto InTableAnythingElse; - } - } - # An end tag... - elseif ($token instanceof EndTagToken) { - # An end tag whose tag name is "table" - if ($token->name === "table") { - # If the stack of open elements does not have a table - # element in table scope, this is a parse error; - # ignore the token. - if (!$this->stack->hasElementInTableScope("table")) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise: - else { - # Pop elements from this stack until a table element - # has been popped from the stack. - $this->stack->popUntil("table"); - # Reset the insertion mode appropriately. - $this->resetInsertionMode(); - } - } - # An end tag whose tag name is one of: "body", "caption", - # "col", "colgroup", "html", "tbody", "td", "tfoot", "th", - # "thead", "tr" - elseif (in_array($token->name, ["body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"])) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # An end tag whose tag name is "template" - elseif ($token->name === "template") { - # Process the token using the rules for the "in head" - # insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - // Any other end tag - else { - goto InTableAnythingElse; - } - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Process the token using the rules for the "in body" - # insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # Anything else - else { - InTableAnythingElse: - # Parse error. Enable foster parenting, process the token - # using the rules for the "in body" insertion mode, and - # then disable foster parenting. - if ($token instanceof CharacterToken) { - $this->error(ParseError::FOSTERED_CHAR); - } elseif ($token instanceof StartTagToken) { - $this->error(ParseError::FOSTERED_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::FOSTERED_END_TAG, $token->name); - } - $this->fosterParenting = true; - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - // NOTE: Foster parenting will be turned off when re-entering this mode with the next token - } - } - # 13.2.6.4.10 The "in table text" insertion mode - elseif ($insertionMode === self::IN_TABLE_TEXT_MODE) { - # A character token that is U+0000 NULL - if ($token instanceof NullCharacterToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - } - # Any other character token - elseif ($token instanceof CharacterToken) { - # Append the character token to the pending table character - # tokens list. - $this->pendingTableCharacterTokens[] = $token; - } - # Anything else - else { - $ws = true; - foreach ($this->pendingTableCharacterTokens as $pending) { - if (!$pending instanceof WhitespaceToken) { - $ws = false; - break; - } - } - # If any of the tokens in the pending table character tokens - # list are character tokens that are not ASCII whitespace, - # then this is a parse error: reprocess the character tokens - # in the pending table character tokens list using the rules - # given in the "anything else" entry in the "in table" - # insertion mode. - // NOTE: This is efectively the same as reprocessing in the - // "in body" mode - if (!$ws) { - $this->error(ParseError::UNEXPECTED_CHAR); - $this->fosterParenting = true; - foreach ($this->pendingTableCharacterTokens as $pending) { - // The relevant parts of the "in body" mode are reproduced here - $this->reconstructActiveFormattingElements(); - if ($pending instanceof NullCharacterToken) { - // Ignore the token - } elseif ($pending instanceof WhitespaceToken) { - $this->insertCharacterToken($pending); - } else { - $this->insertCharacterToken($pending); - $this->framesetOk = false; - } - } - $this->fosterParenting = false; - } - # Otherwise, insert the characters given by the pending table - # character tokens list. - else { - foreach ($this->pendingTableCharacterTokens as $pending) { - $this->insertCharacterToken($pending); - } - } - $this->pendingTableCharacterTokens = []; - # Switch the insertion mode to the original insertion mode - # and reprocess the token. - $insertionMode = $this->insertionMode = $this->originalInsertionMode; - goto ProcessToken; - } - } - # 13.2.6.4.11 The "in caption" insertion mode - elseif ($insertionMode === self::IN_CAPTION_MODE) { - # An end tag whose tag name is "caption" - if ($token instanceof EndTagToken && $token->name === "caption") { - # If the stack of open elements does not have a caption - # element in table scope, this is a parse error; ignore - # the token. (fragment case) - if (!$this->stack->hasElementInTableScope("caption")) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise: - else { - # Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # Now, if the current node is not a caption element, - # then this is a parse error. - if ($this->stack->currentNodeName !== "caption") { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop elements from this stack until a caption element - # has been popped from the stack. - $this->stack->popUntil("caption"); - # Clear the list of active formatting elements up to - # the last marker. - $this->activeFormattingElementsList->clearToTheLastMarker(); - # Switch the insertion mode to "in table". - $this->insertionMode = self::IN_TABLE_MODE; - } - } - # A start tag whose tag name is one of: "caption", "col", - # "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr" - # An end tag whose tag name is "table" - elseif ( - ($token instanceof StartTagToken && in_array($token->name, ["caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"])) - || ($token instanceof EndTagToken && $token->name === "table") - ) { - $errorCode = ($token instanceof StartTagToken) ? ParseError::UNEXPECTED_START_TAG : ParseError::UNEXPECTED_END_TAG; - # If the stack of open elements does not have a caption - # element in table scope, this is a parse error; ignore - # the token. (fragment case) - if (!$this->stack->hasElementInTableScope("caption")) { - $this->error($errorCode, $token->name); - } - # Otherwise: - else { - # Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # Now, if the current node is not a caption element, - # then this is a parse error. - if ($this->stack->currentNodeName !== "caption") { - $this->error($errorCode, $token->name); - } - # Pop elements from this stack until a caption element - # has been popped from the stack. - $this->stack->pop("caption"); - # Clear the list of active formatting elements up to - # the last marker. - $this->activeFormattingElementsList->clearToTheLastMarker(); - # Switch the insertion mode to "in table". - $insertionMode = $this->insertionMode = self::IN_TABLE_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # An end tag whose tag name is one of: "body", "col", "colgroup", - # "html", "tbody", "td", "tfoot", "th", "thead", "tr" - elseif ($token instanceof EndTagToken && in_array($token->name, ["body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"])) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Anything else - else { - # Process the token using the rules for the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - } - # 13.2.6.4.12 The "in column group" insertion mode - elseif ($insertionMode === self::IN_COLUMN_GROUP_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, - # U+000A LINE FEED (LF), U+000C FORM FEED (FF), - # U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - if ($token instanceof WhitespaceToken) { - # Insert the character. - $this->insertCharacterToken($token); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag whose tag name is "html" - elseif ($token instanceof StartTagToken && $token->name === "html") { - # Process the token using the rules for the "in body" - # insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "col" - elseif ($token instanceof StartTagToken && $token->name === "col") { - # Insert an HTML element for the token. Immediately pop - # the current node off the stack of open elements. - $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token's self-closing flag, if it is set. - $token->selfClosingAcknowledged = true; - } - # An end tag whose tag name is "colgroup" - elseif ($token instanceof EndTagToken && $token->name === "colgroup") { - # If the current node is not a colgroup element, - # then this is a parse error; ignore the token. - if ($this->stack->currentNodeName !== "colgroup") { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, pop the current node from the stack of open - # elements. Switch the insertion mode to "in table". - else { - $this->stack->pop(); - $this->insertionMode = self::IN_TABLE_MODE; - } - } - # An end tag whose tag name is "col" - elseif ($token instanceof EndTagToken && $token->name === "col") { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # A start tag whose tag name is "template" - # An end tag whose tag name is "template" - elseif ($token instanceof TagToken && $token->name === "template") { - # Process the token using the rules for - # the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # Anything else - else { - # If the current node is not a colgroup element, then this - # is a parse error; ignore the token. - if ($this->stack->currentNodeName !== "colgroup") { - if ($token instanceof CharacterToken) { - $this->error(ParseError::UNEXPECTED_CHAR); - } elseif ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # Otherwise, pop the current node from the stack - # of open elements. - # Switch the insertion mode to "in table". - # Reprocess the token. - else { - $this->stack->pop(); - $insertionMode = $this->insertionMode = self::IN_TABLE_MODE; - goto ProcessToken; - } - } - } - # 13.2.6.4.13 The "in table body" insertion mode - elseif ($insertionMode === self::IN_TABLE_BODY_MODE) { - // NOTE: Foster parenting is turned off when evaluating this - // mode as it may have been turned on in a previous evluation - // of the "in table" mode - $this->fosterParenting = false; - # A start tag whose tag name is "tr" - if ($token instanceof StartTagToken && $token->name === "tr") { - # Clear the stack back to a table body context. (See below.) - $this->stack->clearToTableBodyContext(); - # Insert an HTML element for the token, then switch the - # insertion mode to "in row". - $this->insertStartTagToken($token); - $this->insertionMode = self::IN_ROW_MODE; - } - # A start tag whose tag name is one of: "th", "td" - elseif ($token instanceof StartTagToken && ($token->name === "td" || $token->name === "th")) { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # Clear the stack back to a table body context. (See below.) - $this->stack->clearToTableBodyContext(); - # Insert an HTML element for a "tr" start tag token with no - # attributes, then switch the insertion mode to "in row". - $this->insertStartTagToken(new StartTagToken("tr")); - $insertionMode = $this->insertionMode = self::IN_ROW_MODE; - # Reprocess the current token. - goto ProcessToken; - } - # An end tag whose tag name is one of: "tbody", "tfoot", "thead" - elseif ($token instanceof EndTagToken && (in_array($token->name, ["tbody", "tfoot", "thead"]))) { - # If the stack of open elements does not have an element in - # table scope that is an HTML element with the same tag name - # as the token, this is a parse error; ignore the token. - if (!$this->stack->hasElementInTableScope($token->name)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise: - else { - # Clear the stack back to a table body context. - $this->stack->clearToTableBodyContext(); - # Pop the current node from the stack of open elements. - $this->stack->pop(); - # Switch the insertion mode to "in table". - $this->insertionMode = self::IN_TABLE_MODE; - } - } - # A start tag whose tag name is one of: "caption", "col", - # "colgroup", "tbody", "tfoot", "thead" - # An end tag whose tag name is "table" - elseif ( - ($token instanceof StartTagToken && in_array($token->name, ["caption", "col", "colgroup", "tbody", "tfoot", "thead"])) - || ($token instanceof EndTagToken && $token->name === "table") - ) { - # If the stack of open elements does not have a tbody, thead, - # or tfoot element in table scope, this is a parse error; - # ignore the token. - if (!$this->stack->hasElementInTableScope("tbody", "tfoot", "thead")) { - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } else { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # Otherwise: - else { - # Clear the stack back to a table body context. - $this->stack->clearToTableBodyContext(); - # Pop the current node from the stack of open elements. - $this->stack->pop(); - # Switch the insertion mode to "in table". - $insertionMode = $this->insertionMode = self::IN_TABLE_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # An end tag whose tag name is one of: "body", "caption", "col", - # "colgroup", "html", "td", "th", "tr" - elseif ($token instanceof EndTagToken && in_array($token->name, ["body", "caption", "col", "colgroup", "html", "td", "th", "tr"])) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Anything else - else { - # Process the token using the rules for - # the "in table" insertion mode. - $insertionMode = self::IN_TABLE_MODE; - goto ProcessToken; - } - } - # 13.2.6.4.14 The "in row" insertion mode - elseif ($insertionMode === self::IN_ROW_MODE) { - // NOTE: Foster parenting is turned off when evaluating this - // mode as it may have been turned on in a previous evluation - // of the "in table" mode - $this->fosterParenting = false; - # A start tag whose tag name is one of: "th", "td" - if ($token instanceof StartTagToken && ($token->name === "th" || $token->name === "td")) { - # Clear the stack back to a table row context. - $this->stack->clearToTableRowContext(); - # Insert an HTML element for the token, then - # switch the insertion mode to "in cell". - $this->insertStartTagToken($token); - $this->insertionMode = self::IN_CELL_MODE; - # Insert a marker at the end of the list of active - # formatting elements. - $this->activeFormattingElementsList->insertMarker(); - } - # An end tag whose tag name is "tr" - elseif ($token instanceof EndTagToken && $token->name === "tr") { - # If the stack of open elements does not have a tr element - # in table scope, this is a parse error; ignore the token. - if (!$this->stack->hasElementInTableScope("tr")) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise: - else { - # Clear the stack back to a table row context. - $this->stack->clearToTableRowContext(); - # Pop the current node (which will be a tr element) from - # the stack of open elements. Switch the insertion - # mode to "in table body". - $this->stack->pop(); - $this->insertionMode = self::IN_TABLE_BODY_MODE; - } - } - # A start tag whose tag name is one of: "caption", "col", - # "colgroup", "tbody", "tfoot", "thead", "tr" - # An end tag whose tag name is "table" - elseif ( - ($token instanceof StartTagToken && in_array($token->name, ["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"])) - || ($token instanceof EndTagToken && $token->name === "table") - ) { - # If the stack of open elements does not have a tr element - # in table scope, this is a parse error; ignore the token. - if (!$this->stack->hasElementInTableScope("tr")) { - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } else { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # Otherwise: - else { - # Clear the stack back to a table row context. - $this->stack->clearToTableRowContext(); - # Pop the current node (which will be a tr element) - # from the stack of open elements. Switch the - # insertion mode to "in table body". - $this->stack->pop(); - $insertionMode = $this->insertionMode = self::IN_TABLE_BODY_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # An end tag whose tag name is one of: "tbody", "tfoot", "thead" - elseif ($token instanceof EndTagToken && (in_array($token->name, ["tbody", "tfoot", "thead"]))) { - # If the stack of open elements does not have an element - # in table scope that is an HTML element with the same - # tag name as the token, this is a parse error; - # ignore the token. - if (!$this->stack->hasElementInTableScope($token->name)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # If the stack of open elements does not have a tr element - # in table scope, ignore the token. - elseif (!$this->stack->hasElementInTableScope("tr")) { - // Ignore the token - } - # Otherwise: - else { - # Clear the stack back to a table row context. - $this->stack->clearToTableRowContext(); - # Pop the current node (which will be a tr element) from - # the stack of open elements. Switch the insertion mode - # to "in table body". - $this->stack->pop(); - $insertionMode = $this->insertionMode = self::IN_TABLE_BODY_MODE; - # Reprocess the token. - goto ProcessToken; - } - } - # An end tag whose tag name is one of: "body", "caption", "col", - # "colgroup", "html", "td", "th" - elseif ($token instanceof EndTagToken && in_array($token->name, ["body", "caption", "col", "colgroup", "html", "td", "th"])) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Anything else - else { - # Process the token using the rules for the - # "in table" insertion mode. - $insertionMode = self::IN_TABLE_MODE; - goto ProcessToken; - } - } - # 13.2.6.4.15 The "in cell" insertion mode - elseif ($insertionMode === self::IN_CELL_MODE) { - # An end tag whose tag name is one of: "td", "th" - if ($token instanceof EndTagToken && ($token->name === "td" || $token->name === "th")) { - # If the stack of open elements does not have an element in - # table scope that is an HTML element with the same tag - # name as that of the token, then this is a parse error; - # ignore the token. - if (!$this->stack->hasElementInTableScope($token->name)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise: - else { - # Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # Now, if the current node is not an HTML element with - # the same tag name as the token, then this is - # a parse error. - if ($this->stack->currentNodeName !== $token->name || $this->stack->currentNodeNamespace !== null) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop elements from the stack of open elements stack - # until an HTML element with the same tag name as the - # token has been popped from the stack. - $this->stack->popUntil($token->name); - # Clear the list of active formatting elements up to the last marker. - $this->activeFormattingElementsList->clearToTheLastMarker(); - # Switch the insertion mode to "in row". - $this->insertionMode = self::IN_ROW_MODE; - } - } - # A start tag whose tag name is one of: "caption", "col", - # "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr" - elseif ($token instanceof StartTagToken && in_array($token->name, ["caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"])) { - # If the stack of open elements does not have a td or th - # element in table scope, then this is a parse error; - # ignore the token. (fragment case) - if (!$this->stack->hasElementInTableScope("td", "th")) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - # Otherwise, close the cell (see below) and reprocess the token. - else { - $insertionMode = $this->closeCell($token); - goto ProcessToken; - } - } - # An end tag whose tag name is one of: "body", "caption", "col", - # "colgroup", "html" - elseif ($token instanceof EndTagToken && in_array($token->name, ["body", "caption", "col", "colgroup", "html"])) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # An end tag whose tag name is one of: "table", "tbody", - # "tfoot", "thead", "tr" - elseif ($token instanceof EndTagToken && in_array($token->name, ["table", "tbody", "tfoot", "thead", "tr"])) { - # If the stack of open elements does not have an element in - # table scope that is an HTML element with the same tag - # name as that of the token, then this is a parse error; - # ignore the token. - if (!$this->stack->hasElementInTableScope($token->name)) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, close the cell (see below) and reprocess the token. - else { - $insertionMode = $this->closeCell($token); - goto ProcessToken; - } - } - # Anything else - else { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - } - # 13.2.6.4.16 The "in select" insertion mode - elseif ($insertionMode === self::IN_SELECT_MODE) { - # A character token that is U+0000 NULL - if ($token instanceof NullCharacterToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - } - # Any other character token - elseif ($token instanceof CharacterToken) { - # Insert the token's character. - $this->insertCharacterToken($token); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is "html" - if ($token->name === "html") { - # Process the token using the rules for the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "option" - elseif ($token->name === "option") { - # If the current node is an option element, pop that - # node from the stack of open elements. - if ($this->stack->currentNodeName === "option") { - $this->stack->pop(); - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is "optgroup" - elseif ($token->name === "optgroup") { - # If the current node is an option element, pop that - # node from the stack of open elements. - if ($this->stack->currentNodeName === "option") { - $this->stack->pop(); - } - # If the current node is an optgroup element, pop that - # node from the stack of open elements. - if ($this->stack->currentNodeName === "optgroup") { - $this->stack->pop(); - } - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is "select" - elseif ($token->name === "select") { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # If the stack of open elements does not have a select - # element in select scope, ignore the token. (fragment case) - if (!$this->stack->hasElementInSelectScope("select")) { - // Ignore the token - } - # Otherwise: - else { - # Pop elements from the stack of open elements until - # a select element has been popped from the stack. - $this->stack->popUntil("select"); - # Reset the insertion mode appropriately. - $this->resetInsertionMode(); - } - } - # A start tag whose tag name is one of: "input", "keygen", "textarea" - elseif (in_array($token->name, ["input", "keygen", "textarea"])) { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # If the stack of open elements does not have a select - # element in select scope, ignore the token. (fragment case) - if (!$this->stack->hasElementInSelectScope("select")) { - // Ignore the token - } - # Otherwise: - else { - # Pop elements from the stack of open elements until - # a select element has been popped from the stack. - $this->stack->popUntil("select"); - # Reset the insertion mode appropriately. - $insertionMode = $this->resetInsertionMode(); - # Reprocess the token. - goto ProcessToken; - } - } - # A start tag whose tag name is one of: "script", "template" - elseif ($token->name === "script" || $token->name === "template") { - # Process the token using the rules for the - # "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - // Any other start tag - else { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - } - # An end tag... - elseif ($token instanceof EndTagToken) { - # An end tag whose tag name is "template" - if ($token->name === "tenplate") { - # Process the token using the rules for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # An end tag whose tag name is "optgroup" - elseif ($token->name === "optgroup") { - # First, if the current node is an option element, and - # the node immediately before it in the stack of open - # elements is an optgroup element, then pop the current - # node from the stack of open elements. - if ($this->stack->currentNodeName === "option" && $this->stack->top(1)->nodeName === "optgroup") { - $this->stack->pop(); - } - # If the current node is an optgroup element, then pop - # that node from the stack of open elements. - if ($this->stack->currentNodeName === "optgroup") { - $this->stack->pop(); - } - # Otherwise, this is a parse error; ignore the token. - else { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # An end tag whose tag name is "option" - elseif ($token->name === "option") { - # If the current node is an option element, then pop - # that node from the stack of open elements. - if ($this->stack->currentNodeName === "option") { - $this->stack->pop(); - } - # Otherwise, this is a parse error; ignore the token. - else { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # An end tag whose tag name is "select" - elseif ($token->name === "select") { - # If the stack of open elements does not have a select - # element in select scope, this is a parse error; - # ignore the token. (fragment case) - if (!$this->stack->hasElementInSelectScope("select")) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise: - else { - # Pop elements from the stack of open elements until - # a select element has been popped from the stack. - $this->stack->popUntil("select"); - # Reset the insertion mode appropriately. - $this->resetInsertionMode(); - } - } - // Any other end tag - else { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Process the token using the rules for the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # Anything else - else { - # Parse error. Ignore the token. - // NOTE: All other cases are start or end tags handled above - throw new Exception(Exception::UNREACHABLE_CODE); // @codeCoverageIgnore - } - } - # 13.2.6.4.17 The "in select in table" insertion mode - elseif ($insertionMode === self::IN_SELECT_IN_TABLE_MODE) { - # A start tag whose tag name is one of: "caption", "table", - # "tbody", "tfoot", "thead", "tr", "td", "th" - if ($token instanceof StartTagToken && in_array($token->name, ["caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"])) { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # Pop elements from the stack of open elements until a - # select element has been popped from the stack. - $this->stack->popUntil("select"); - # Reset the insertion mode appropriately. - $insertionMode = $this->resetInsertionMode(); - # Reprocess the token. - goto ProcessToken; - } - # An end tag whose tag name is one of: "caption", "table", - # "tbody", "tfoot", "thead", "tr", "td", "th" - elseif ($token instanceof EndTagToken && in_array($token->name, ["caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"])) { - # Parse error. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - # If the stack of open elements does not have an element in - # table scope that is an HTML element with the same tag name - # as that of the token, then ignore the token. - if (!$this->stack->hasElementInTableScope($token->name)) { - // Ignore the token - } - # Otherwise: - else { - # Pop elements from the stack of open elements until a - # select element has been popped from the stack. - $this->stack->popUntil("select"); - # Reset the insertion mode appropriately. - $insertionMode = $this->resetInsertionMode(); - # Reprocess the token. - goto ProcessToken; - } - } - # Anything else - else { - # Process the token using the rules for the - # "in select" insertion mode. - $insertionMode = self::IN_SELECT_MODE; - goto ProcessToken; - } - } - # 13.2.6.4.18 The "in template" insertion mode - elseif ($insertionMode === self::IN_TEMPLATE_MODE) { - # A character token - # A comment token - # A DOCTYPE token - if ($token instanceof CharacterToken || $token instanceof CommentToken || $token instanceof DOCTYPEToken) { - # Process the token using the rules for the - # "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is one of: "base", "basefont", - # "bgsound", "link", "meta", "noframes", "script", "style", - # "template", "title" - if (in_array($token->name, ["base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "template", "title"])) { - # Process the token using the rules for the - # "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # A start tag whose tag name is one of: "caption", - # "colgroup", "tbody", "tfoot", "thead" - elseif (in_array($token->name, ["caption", "colgroup", "tbody", "tfoot", "thead"])) { - # Pop the current template insertion mode off the stack - # of template insertion modes. - $this->templateInsertionModes->pop(); - # Push "in table" onto the stack of template insertion - # modes so that it is the new current - # template insertion mode. - $this->templateInsertionModes[] = self::IN_TABLE_MODE; - # Switch the insertion mode to "in table", and - # reprocess the token. - $insertionMode = $this->insertionMode = self::IN_TABLE_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "col" - elseif ($token->name === "col") { - # Pop the current template insertion mode off the stack - # of template insertion modes. - $this->templateInsertionModes->pop(); - # Push "in column group" onto the stack of template - # insertion modes so that it is the new current - # template insertion mode. - $this->templateInsertionModes[] = self::IN_COLUMN_GROUP_MODE; - # Switch the insertion mode to "in column group", and - # reprocess the token. - $insertionMode = $this->insertionMode = self::IN_COLUMN_GROUP_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "tr" - elseif ($token->name === "tr") { - # Pop the current template insertion mode off the stack - # of template insertion modes. - $this->templateInsertionModes->pop(); - # Push "in table body" onto the stack of template - # insertion modes so that it is the new current - # template insertion mode. - $this->templateInsertionModes[] = self::IN_TABLE_BODY_MODE; - # Switch the insertion mode to "in table body", - # and reprocess the token. - $insertionMode = $this->insertionMode = self::IN_TABLE_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is one of: "td", "th" - elseif ($token->name === "td" || $token->name === "th") { - # Pop the current template insertion mode off the stack - # of template insertion modes. - $this->templateInsertionModes->pop(); - # Push "in row" onto the stack of template insertion - # modes so that it is the new current template - # insertion mode. - $this->templateInsertionModes[] = self::IN_ROW_MODE; - # Switch the insertion mode to "in row", - # and reprocess the token. - $insertionMode = $this->insertionMode = self::IN_ROW_MODE; - goto ProcessToken; - } - # Any other start tag - else { - # Pop the current template insertion mode off the stack - # of template insertion modes. - $this->templateInsertionModes->pop(); - # Push "in body" onto the stack of template insertion - # modes so that it is the new current template - # insertion mode. - $this->templateInsertionModes[] = self::IN_BODY_MODE; - # Switch the insertion mode to "in body", - # and reprocess the token. - $insertionMode = $this->insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - } - # An end tag whose tag name is "template" - elseif ($token instanceof EndTagToken && $token->name === "template") { - # Process the token using the rules for the - # "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # Any other end tag - elseif ($token instanceof EndTagToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # If there is no template element on the stack of open - # elements, then stop parsing. (fragment case) - if (!$this->stack->find("template") === -1) { - // Stop parsing - } - else { - # Otherwise, this is a parse error. - $this->error(ParseError::UNEXPECTED_EOF); - # Pop elements from the stack of open elements until - # a template element has been popped from the stack. - $this->stack->popUntil("template"); - # Clear the list of active formatting elements up to - # the last marker. - $this->activeFormattingElementsList->clearToTheLastMarker(); - # Pop the current template insertion mode off the stack - # of template insertion modes. - $this->templateInsertionModes->pop(); - # Reset the insertion mode appropriately. - $insertionMode = $this->resetInsertionMode(); - # Reprocess the token. - goto ProcessToken; - } - } - } - # 13.2.6.4.19 The "after body" insertion mode - elseif ($insertionMode === self::AFTER_BODY_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, - # U+000A LINE FEED (LF), U+000C FORM FEED (FF), - # U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - if ($token instanceof WhitespaceToken) { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment as the last child of the first element - # in the stack of open elements (the html element). - $this->insertCommentToken($token, $this->stack[0]); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag whose tag name is "html" - elseif ($token instanceof StartTagToken && $token->name === "html") { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # An end tag whose tag name is "html" - elseif ($token instanceof EndTagToken && $token->name === "html") { - # If the parser was created as part of the HTML fragment - # parsing algorithm, this is a parse error; - # ignore the token. (fragment case) - if ($this->fragmentContext) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Otherwise, switch the insertion mode to "after after body". - else { - $this->insertionMode = self::AFTER_AFTER_BODY_MODE; - } - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Stop parsing. - return; - } - # Anything else - else { - # Parse error. - assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token))); - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } elseif ($token instanceof CharacterToken) { - $this->error(ParseError::UNEXPECTED_CHAR); - } - # Switch the insertion mode to "in body" - # and reprocess the token. - $insertionMode = $this->insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - } - # 13.2.6.4.20 The "in frameset" insertion mode - elseif ($insertionMode === self::IN_FRAMESET_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, - # U+000A LINE FEED (LF), U+000C FORM FEED (FF), - # U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - if ($token instanceof WhitespaceToken) { - # Insert the character. - $this->insertCharacterToken($token); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is "html" - if ($token->name === "html") { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # A start tag whose tag name is "frameset" - elseif ($token->name === "frameset") { - # Insert an HTML element for the token. - $this->insertStartTagToken($token); - } - # A start tag whose tag name is "frame" - elseif ($token->name === "frame") { - # Insert an HTML element for the token. Immediately pop - # the current node off the stack of open elements. - $this->insertStartTagToken($token); - $this->stack->pop(); - # Acknowledge the token's self-closing flag, if it is set. - $token->selfClosingAcknowledged = true; - } - # A start tag whose tag name is "noframes" - elseif ($token->name === "noframes") { - # Process the token using the rules - # for the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - // Any other start tag - else { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } - } - # An end tag whose tag name is "frameset" - elseif ($token instanceof EndTagToken && $token->name === "frameset") { - # If the current node is the root html element, then this - # is a parse error; ignore the token. (fragment case) - if (count($this->stack) < 2) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - else { - # Otherwise, pop the current node from - # the stack of open elements. - $this->stack->pop(); - # If the parser was not created as part of the HTML - # fragment parsing algorithm (fragment case), and the - # current node is no longer a frameset element, then switch - # the insertion mode to "after frameset". - if (!$this->fragmentContext && $this->stack->currentNodeName !== "frameset") { - $this->insertionMode = self::AFTER_FRAMESET_MODE; - } - } - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # If the current node is not the root html element, - # then this is a parse error. - if (count($this->stack) > 1) { - $this->error(ParseError::UNEXPECTED_EOF); - } - # Stop parsing. - return; - } - # Anything else - else { - # Parse error. Ignore the token. - assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token))); - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } elseif ($token instanceof CharacterToken) { - $this->error(ParseError::UNEXPECTED_CHAR); - // Extract any whitespace characters from the token and insert them - $ws = preg_replace('/[^\x09\x0a\x0c\x0d ]+/', "", $token->data); - if (strlen($ws)) { - $this->insertCharacterToken(new WhitespaceToken($ws)); - } - } - } - } - # 13.2.6.4.21 The "after frameset" insertion mode - elseif ($insertionMode === self::AFTER_FRAMESET_MODE) { - # A character token that is one of U+0009 CHARACTER TABULATION, - # U+000A LINE FEED (LF), U+000C FORM FEED (FF), - # U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - if ($token instanceof WhitespaceToken) { - # Insert the character. - $this->insertCharacterToken($token); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag whose tag name is "html" - elseif ($token instanceof StartTagToken && $token->name === "html") { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # An end tag whose tag name is "html" - elseif ($token instanceof EndTagToken && $token->name === "html") { - # Switch the insertion mode to "after after frameset". - $this->insertionMode = self::AFTER_AFTER_FRAMESET_MODE; - } - # A start tag whose tag name is "noframes" - elseif ($token instanceof StartTagToken && $token->name === "noframes") { - # Process the token using the rules for - # the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Stop parsing. - return; - } - # Anything else - else { - # Parse error. Ignore the token. - assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token))); - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } elseif ($token instanceof CharacterToken) { - $this->error(ParseError::UNEXPECTED_CHAR); - // Extract any whitespace characters from the token and insert them - $ws = preg_replace('/[^\x09\x0a\x0c\x0d ]+/', "", $token->data); - if (strlen($ws)) { - $this->insertCharacterToken(new WhitespaceToken($ws)); - } - } - } - } - # 13.2.6.4.22 The "after after body" insertion mode - elseif ($insertionMode === self::AFTER_AFTER_BODY_MODE) { - # A comment token - if ($token instanceof CommentToken) { - # Insert a comment as the last child of the Document object. - $this->insertCommentToken($token, $this->DOM); - } - # A DOCTYPE token - # A character token that is one of U+0009 CHARACTER TABULATION, - # U+000A LINE FEED (LF), U+000C FORM FEED (FF), - # U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - # A start tag whose tag name is "html" - elseif ($token instanceof DOCTYPEToken || $token instanceof WhitespaceToken || ($token instanceof StartTagToken && $token->name === "html")) { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Stop parsing. - return; - } - # Anything else - else { - # Parse error. - assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token))); - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } elseif ($token instanceof CharacterToken) { - $this->error(ParseError::UNEXPECTED_CHAR); - } - # Switch the insertion mode to "in body" and reprocess the token. - $insertionMode = $this->insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - } - # 13.2.6.4.23 The "after after frameset" insertion mode - elseif ($insertionMode === self::AFTER_AFTER_FRAMESET_MODE) { - # A comment token - if ($token instanceof CommentToken) { - # Insert a comment as the last child of the Document object. - $this->insertCommentToken($token, $this->DOM); - } - # A DOCTYPE token - # A character token that is one of U+0009 CHARACTER TABULATION, - # U+000A LINE FEED (LF), U+000C FORM FEED (FF), - # U+000D CARRIAGE RETURN (CR), or U+0020 SPACE - # A start tag whose tag name is "html" - elseif ($token instanceof DOCTYPEToken || $token instanceof WhitespaceToken || ($token instanceof StartTagToken && $token->name === "html")) { - # Process the token using the rules for - # the "in body" insertion mode. - $insertionMode = self::IN_BODY_MODE; - goto ProcessToken; - } - # An end-of-file token - elseif ($token instanceof EOFToken) { - # Stop parsing. - return; - } - # A start tag whose tag name is "noframes" - elseif ($token instanceof StartTagToken && $token->name === "noframes") { - # Process the token using the rules for - # the "in head" insertion mode. - $insertionMode = self::IN_HEAD_MODE; - goto ProcessToken; - } - # Anything else - else { - # Parse error. Ignore the token. - assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token))); - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } elseif ($token instanceof EndTagToken) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } elseif ($token instanceof CharacterToken) { - $this->error(ParseError::UNEXPECTED_CHAR); - } - } - } - else { - throw new Exception(Exception::UNREACHABLE_CODE); // @codeCoverageIgnore - } - } - # Otherwise - else { - # Process the token according to the rules given in the section - # for parsing tokens in foreign content. - - assert((function() { - $this->debugLog .= " Mode: Foreign content (".(string) $this->stack.")\n"; - return true; - })()); - - # 13.2.6.5 The rules for parsing tokens in foreign content - # - # When the user agent is to apply the rules for parsing tokens in foreign - # content, the user agent must handle the token as follows: - - - - // NOTE: Foster parenting is turned off when evaluating this - // mode as it may have been turned on in a previous evluation - // of the "in table" mode - $this->fosterParenting = false; - # A character token that is U+0000 NULL - if ($token instanceof NullCharacterToken) { - # Parse error. Insert a U+FFFD REPLACEMENT CHARACTER character. - // DEVIATION: Parse errors for null characters are already emitted by the tokenizer - $this->insertCharacterToken(new CharacterToken("\u{FFFD}")); - } - # A character token that is one of U+0009 CHARACTER TABULATION, "LF" (U+000A), - # "FF" (U+000C), "CR" (U+000D), or U+0020 SPACE - elseif ($token instanceof WhitespaceToken) { - # Insert the token's character. - $this->insertCharacterToken($token); - } - # Any other character token - elseif ($token instanceof CharacterToken) { - # Set the frameset-ok flag to "not ok". - $this->framesetOk = false; - # Insert the token's character. - $this->insertCharacterToken($token); - } - # A comment token - elseif ($token instanceof CommentToken) { - # Insert a comment. - $this->insertCommentToken($token); - } - # A DOCTYPE token - elseif ($token instanceof DOCTYPEToken) { - # Parse error. Ignore the token. - $this->error(ParseError::UNEXPECTED_DOCTYPE); - } - # A start tag... - elseif ($token instanceof StartTagToken) { - # A start tag whose tag name is one of: "b", "big", "blockquote", "body", "br", - # "center", "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", - # "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", - # "nobr", "ol", "p", "pre", "ruby", "s", "small", "span", "strong", "strike", - # "sub", "sup", "table", "tt", "u", "ul", "var" - # A start tag whose tag name is "font", if the token has any attributes named - # "color", "face", or "size" - if ( - in_array($token->name, ['b', 'big', 'blockquote', 'body', 'br', 'center', 'code', 'dd', 'div', 'dl', 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i', 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby', 's', 'small', 'span', 'strong', 'strike', 'sub', 'sup', 'table', 'tt', 'u', 'ul', 'var']) - || ($token->name === 'font' && ($token->hasAttribute('color') || $token->hasAttribute('face') || $token->hasAttribute('size')) ) - ) { - # Parse error. - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - # While the current node is not a MathML text integration - # point, an HTML integration point, or an element in the - # HTML namespace, pop elements from the stack of - # open elements. - while (($node = $this->stack->currentNode) && !($node->namespaceURI === null || $this->isMathMLTextIntegrationPoint($node) || $this->isHTMLIntegrationPoint($node))) { - $this->stack->pop(); - } - # Process the token using the rules for the - # "in body" insertion mode. - // DEVIATION: Spec bug - // See https://github.com/whatwg/html/issues/6439 - goto ProcessToken; - } - # Any other start tag - else { - foreignContentAnyOtherStartTag: - $currentNodeNamespace = $this->stack->currentNodeNamespace; - # If the adjusted current node is an element in the SVG namespace, and the - # token’s tag name is one of the ones in the first column of the following - # table, change the tag name to the name given in the corresponding cell in the - # second column. (This fixes the case of SVG elements that are not all - # lowercase.) - if ($this->stack->adjustedCurrentNodeNamespace === Parser::SVG_NAMESPACE) { - $token->name = self::SVG_TAG_NAME_MAP[$token->name] ?? $token->name; - } - foreach ($token->attributes as $a) { - # If the current node is an element in the MathML namespace, adjust MathML - # attributes for the token. (This fixes the case of MathML attributes that are - # not all lowercase.) - if ($currentNodeNamespace === Parser::MATHML_NAMESPACE && $a->name === 'definitionurl') { - $a->name = 'definitionURL'; - } - # If the current node is an element in the SVG namespace, adjust SVG attributes - # for the token. (This fixes the case of SVG attributes that are not all - # lowercase.) - elseif ($currentNodeNamespace === Parser::SVG_NAMESPACE) { - $a->name = self::SVG_ATTR_NAME_MAP[$a->name] ?? $a->name; - } - # Adjust foreign attributes for the token. (This fixes the use of namespaced - # attributes, in particular XLink in SVG.) - # When the steps below require the user agent to adjust foreign attributes for a - # token, then, if any of the attributes on the token match the strings given in - # the first column of the following table, let the attribute be a namespaced - # attribute, with the prefix being the string given in the corresponding cell in - # the second column, the local name being the string given in the corresponding - # cell in the third column, and the namespace being the namespace given in the - # corresponding cell in the fourth column. (This fixes the use of namespaced - # attributes, in particular lang attributes in the XML namespace.) - // DOMElement::setAttributeNS requires the prefix and local name be in one - // string, so there is no need to separate the prefix and the local name here. - $a->namespace = self::FOREIGN_ATTRIBUTE_NAMESPACE_MAP[$a->name] ?? null; - } - # Insert a foreign element for the token, in the same namespace as the adjusted - # current node. - $this->insertStartTagToken($token, null, $this->stack->adjustedCurrentNode->namespaceURI); - # If the token has its self-closing flag set, then run the appropriate steps - # from the following list: - if ($token->selfClosing) { - # If the token’s tag name is "script", and the new current node is in the SVG - # namespace - // DEVIATION: This implementation does not support scripting, so script elements - // aren't processed differently. - # Otherwise - # Pop the current node off the stack of open elements and acknowledge the - # token’s *self-closing flag*. - $this->stack->pop(); - $token->selfClosingAcknowledged = true; - } - } - } - # An end tag whose tag name is "script", if the current node is a script element - # in the SVG namespace - // DEVIATION: This implementation does not support scripting, so script elements - // aren't processed differently. - # Any other end tag - elseif ($token instanceof EndTagToken) { - # Run these steps: - # - # Initialize node to be the current node (the bottommost node of the stack). - // We do this below before the loop - # If node's tag name, converted to ASCII lowercase, is not the - # same as the tag name of the token, then this is a parse error. - // DEVIATION: We only generate the parse error if we don't reach - // "Otherwise" below, to avoid reporting the parse error a second - // time in HTML content parsing - $pos = count($this->stack) - 1; - $node = $this->stack[$pos]; - do { - # Loop: If node is the topmost element in the stack of open elements, then return. (fragment case) - if ($pos === 0) { - if (strtolower($this->stack->currentNodeName) !== $token->name) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - continue 2; - } - # If node's tag name, converted to ASCII lowercase, is the same as the - # tag name of the token, pop elements from the stack of open elements until node - # has been popped from the stack, and then abort these steps. - if (strtolower($node->nodeName) === $token->name) { - if (strtolower($this->stack->currentNodeName) !== $token->name) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - $this->stack->popUntilSame($node); - continue 2; - } - # Set node to the previous entry in the stack of open elements. - $node = $this->stack[--$pos]; - # If node is not an element in the HTML namespace, return to the step labeled - # loop. - } while ($node->namespaceURI !== null); - # Otherwise, process the token according to the rules given in the section - # corresponding to the current insertion mode in HTML content. - goto ProcessToken; - } - } - # When a start tag token is emitted with its self-closing flag set, if the flag - # is not acknowledged when it is processed by the tree construction stage, that - # is a non-void-html-element-start-tag-with-trailing-solidus parse error. - if ($token instanceof StartTagToken && $token->selfClosing && !$token->selfClosingAcknowledged) { - $this->error(ParseError::NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS, $token->name); - } - } - } - - protected function adopt(TagToken $token): void { - # The adoption agency algorithm, which takes as its only argument a - # token 'token' for which the algorithm is being run, consists of - # the following steps: - - assert((function() { - $this->debugLog .= " Adoption agency (".(string) $this->stack.")\n"; - return true; - })()); - - # Let subject be token's tag name. - # If the current node is an HTML element whose tag name is subject, - # and the current node is not in the list of active formatting elements, - # then pop the current node off the stack of open elements, and return. - if ( - $this->stack->currentNodeNamespace === null - && $this->stack->currentNodeName === $token->name - && $this->activeFormattingElementsList->findSame($this->stack->currentNode) === -1 - ) { - $this->stack->pop(); - return; - } - $errorCode = $token instanceof StartTagToken ? ParseError::UNEXPECTED_START_TAG : ParseError::UNEXPECTED_END_TAG; - # Let outer loop counter be zero. - $outerLoopCounter = 0; - # Outer loop: If outer loop counter is greater than or equal to eight, then return. - OuterLoop: - if ($outerLoopCounter >= 8) { - return; - } - # Increment outer loop counter by one. - $outerLoopCounter++; - # Let formatting element be the last element in the list of active - # formatting elements that: - # 1. is between the end of the list and the last marker in the list, - # if any, or the start of the list otherwise, and - # 2. has the tag name subject. - $formattingElementIndex = $this->activeFormattingElementsList->findToMarker($token->name); - if ($formattingElementIndex > -1) { - $formattingElement = $this->activeFormattingElementsList[$formattingElementIndex]['element']; - $formattingToken = $this->activeFormattingElementsList[$formattingElementIndex]['token']; - } else { - $formattingElement = null; - } - # If there is no such element, then return and instead act as - # described in the "any other end tag" entry above. - if (!$formattingElement) { - // NOTE: The "entry above" refers to the "in body" insertion mode - // Changes here should be mirrored there - foreach ($this->stack as $node) { - if ($node->nodeName === $token->name && $node->namespaceURI === null) { - $this->stack->generateImpliedEndTags($token->name); - if (!$node->isSameNode($this->stack->currentNode)) { - $this->error($errorCode, $token->name); - } - $this->stack->popUntilSame($node); - return; - } elseif ($this->isElementSpecial($node)) { - $this->error($errorCode, $token->name); - return; - } - } - } - # If formatting element is not in the stack of open elements, - # then this is a parse error; remove the element from the - # list, and return. - if (($stackIndex = $this->stack->findSame($formattingElement)) === -1) { - $this->error($errorCode, $token->name); - unset($this->activeFormattingElementsList[$formattingElementIndex]); - return; - } - # If formatting element is in the stack of open elements, but - # the element is not in scope, then this is a parse error; return. - if (!$this->stack->hasElementInScope($formattingElement)) { - $this->error($errorCode, $token->name); - return; - } - # If formatting element is not the current node, this is a - # parse error. (But do not return.) - if (!$formattingElement->isSameNode($this->stack->currentNode)) { - $this->error($errorCode, $token->name); - } - # Let furthest block be the topmost node in the stack of open elements that - # is lower in the stack than formatting element, and is an element in the - # special category. There might not be one. - $furthestBlock = null; - for ($k = ($stackIndex + 1); $k < count($this->stack); $k++) { - if ($this->isElementSpecial($this->stack[$k])) { - $furthestBlockIndex = $k; - $furthestBlock = $this->stack[$k]; - break; - } - } - # If there is no furthest block, then the UA must first pop all the nodes - # from the bottom of the stack of open elements, from the current node up - # to and including formatting element, then remove formatting element from - # the list of active formatting elements, and finally return. - if (!$furthestBlock) { - $this->stack->popUntilSame($formattingElement); - $this->activeFormattingElementsList->removeSame($formattingElement); - return; - } - # Let common ancestor be the element immediately above formatting element - # in the stack of open elements. - $commonAncestor = $this->stack[$stackIndex - 1] ?? null; - # Let a bookmark note the position of formatting element in the list of - # active formatting elements relative to the elements on either side - # of it in the list. - $bookmark = $formattingElementIndex; - # Let node and last node be furthest block. Follow these steps: - $node = $furthestBlock; - $nodeIndex = $furthestBlockIndex; - $lastNode = $furthestBlock; - # Let inner loop counter be zero. - $innerLoopCounter = 0; - # Inner loop: Increment inner loop counter by one. - InnerLoop: - $innerLoopCounter++; - # Let node be the element immediately above node in the stack of open - # elements, or if node is no longer in the stack of open elements - # (e.g. because it got removed by this algorithm), the element that - # was immediately above node in the stack of open elements before - # node was removed. - $node = $this->stack[--$nodeIndex]; - # If node is formatting element, then go to the next step in the - # overall algorithm. - if ($node->isSameNode($formattingElement)) { - $nodeListPos = $formattingElementIndex; - goto AfterInnerLoop; - } - # If inner loop counter is greater than three and node is in the - # list of active formatting elements, then remove node from the - # list of active formatting elements. - $nodeListPos = $this->activeFormattingElementsList->findSame($node); - if ($innerLoopCounter > 3 && $nodeListPos > -1) { - $this->activeFormattingElementsList->removeSame($node); - if ($bookmark > $nodeListPos) { - $bookmark--; - } - $nodeListPos = -1; - } - # If node is not in the list of active formatting elements, then - # remove node from the stack of open elements and then go back to - # the step labeled inner loop. - if ($nodeListPos === -1) { - $this->stack->removeSame($node); - goto InnerLoop; - } - # Create an element for the token for which the element node was - # created, in the HTML namespace, with common ancestor as the - # intended parent; replace the entry for node in the list of - # active formatting elements with an entry for the new element, - # replace the entry for node in the stack of open elements with - # an entry for the new element, and let node be the new element. - $nodeToken = $this->activeFormattingElementsList[$nodeListPos]['token']; - $element = $this->createElementForToken($nodeToken, null, $commonAncestor); - $this->activeFormattingElementsList[$nodeListPos] = ['token' => $nodeToken, 'element' => $element]; - $this->stack[$nodeIndex] = $element; - $node = $element; - # If last node is furthest block, then move the aforementioned - # bookmark to be immediately after the new node in the list of - # active formatting elements. - if ($lastNode->isSameNode($furthestBlock)) { - $bookmark = $nodeListPos + 1; - } - # Insert last node into node, first removing it from its previous - # parent node if any. - if ($lastNode->parentNode) { - $lastNode->parentNode->removeChild($lastNode); - } - $node->appendChild($lastNode); - # Let last node be node. - $lastNode = $node; - # Return to the step labeled inner loop. - goto InnerLoop; - # Insert whatever last node ended up being in the previous step - # at the appropriate place for inserting a node, but using - # common ancestor as the override target. - AfterInnerLoop: - $place = $this->appropriatePlaceForInsertingNode($commonAncestor); - if ($place['insert before']) { - $place['node']->parentNode->insertBefore($lastNode, $place['node']); - } else { - $place['node']->appendChild($lastNode); - } - # Create an element for the token for which formatting element was - # created, in the HTML namespace, with furthest block as the - # intended parent. - $element = $this->createElementForToken($formattingToken, null, $furthestBlock); - # Take all of the child nodes of furthest block and append them to - # the element created in the last step. - while ($furthestBlock->hasChildNodes()) { - $element->appendChild($furthestBlock->firstChild); - } - # Append that new element to furthest block. - $furthestBlock->appendChild($element); - # Remove formatting element from the list of active formatting - # elements, and insert the new element into the list of active - # formatting elements at the position of the aforementioned bookmark. - $this->activeFormattingElementsList->insert($formattingToken, $element, $bookmark); - $this->activeFormattingElementsList->removeSame($formattingElement); - # Remove formatting element from the stack of open elements, and - # insert the new element into the stack of open elements - # immediately below the position of furthest block in that stack. - assert($stackIndex > 0, new Exception(Exception::STACK_ROOT_ELEMENT_DELETE)); - $this->stack->removeSame($formattingElement); - $this->stack->insert($element, $this->stack->findSame($furthestBlock) + 1); - # Jump back to the step labeled outer loop. - goto OuterLoop; - } - - protected function appropriatePlaceForInsertingNode(\DOMNode $overrideTarget = null): array { - $insertBefore = false; - # 13.2.6.1. Creating and inserting nodes - # - # While the parser is processing a token, it can enable or disable foster - # parenting. This affects the following algorithm. - # - # The appropriate place for inserting a node, optionally using a particular - # override target, is the position in an element returned by running the - # following steps: - - # If there was an override target specified, then let target - # be the override target. Otherwise, let target be the current node. - $target = $overrideTarget ?? $this->stack->currentNode; - assert(isset($target), new Exception(Exception::STACK_INCORRECTLY_EMPTY)); - # Determine the adjusted insertion location using the first matching steps - # from the following list: - $targetNodeName = $target->nodeName; - # If foster parenting is enabled and target is a table, tbody, tfoot, thead, or tr element - if ($this->fosterParenting && ($targetNodeName === 'table' || $targetNodeName === 'tbody' || $targetNodeName === 'tfoot' || $targetNodeName === 'thead' || $targetNodeName === 'tr')) { - # Run these substeps: - # - # 1. Let last template be the last template element in the stack of open - # elements, if any. - $lastTemplateIndex = $this->stack->find('template'); - $lastTemplate = ($lastTemplateIndex > -1 ) ? $this->stack[$lastTemplateIndex] : null; - # 2. Let last table be the last table element in the stack of open elements, if - # any. - $lastTableIndex = $this->stack->find('table'); - $lastTable = ($lastTableIndex > -1 ) ? $this->stack[$lastTableIndex] : null; - # 3. If there is a last template and either there is no last table, or there is - # one, but last template is lower (more recently added) than last table in the - # stack of open elements, then: let adjusted insertion location be inside last - # template’s template contents, after its last child (if any), and abort these - # substeps. - if ($lastTemplate && (!$lastTable || ($lastTemplateIndex > $lastTableIndex))) { - // DEVIATION: We don't implement template contents in the parser itself - $insertionLocation = $lastTemplate; - // Abort! - } - # 4. If there is no last table, then let adjusted insertion location be inside - # the first element in the stack of open elements (the html element), after its - # last child (if any), and abort these substeps. (fragment case) - elseif (!$lastTable) { - $insertionLocation = $this->stack[0]; - // Abort! - } - # 5. If last table has a parent node, then let adjusted insertion location be - # inside last table’s parent node, immediately before last table, and abort - # these substeps. - elseif ($lastTable->parentNode) { - $insertionLocation = $lastTable; - $insertBefore = true; - // Abort! - } - else { - # 6. Let previous element be the element immediately above last table in the - # stack of open elements. - $previousElement = $this->stack[$lastTableIndex - 1]; - # 7. Let adjusted insertion location be inside previous element, after its last - # child (if any). - $insertionLocation = $previousElement; - } - } - # Otherwise let adjusted insertion location be inside target, after its last - # child (if any). - else { - $insertionLocation = $target; - } - # 3. If the adjusted insertion location is inside a template element, let it - # instead be inside the template element’s template contents, after its last - # child (if any). - if ($insertionLocation instanceof Element && $insertionLocation->nodeName === 'template' && $insertionLocation->namespaceURI === null) { - // DEVIATION: We don't implement template contents in the parser itself - $insertionLocation = $insertionLocation; - } - # 4. Return the adjusted insertion location. - return [ - 'node' => $insertionLocation, - 'insert before' => $insertBefore - ]; - } - - public function insertCharacterToken(CharacterToken $token): void { - # 1. Let data be the characters passed to the algorithm, or, if no characters - # were explicitly specified, the character of the character token being - # processed. - // Already provided through the token object. - - # 2. Let the adjusted insertion location be the appropriate place for inserting - # a node. - $location = $this->appropriatePlaceForInsertingNode(); - $adjustedInsertionLocation = $location['node']; - $insertBefore = $location['insert before']; - assert($adjustedInsertionLocation instanceof \DOMNode, new Exception(Exception::TREEBUILDER_INVALID_INSERTION_LOCATION)); - # 3. If the adjusted insertion location is in a Document node, then abort these - # steps. - // NOTE: foster parenting will never point to before the root element - if ($adjustedInsertionLocation instanceof \DOMDocument) { - return; - } - - # 4. If there is a Text node immediately before the adjusted insertion location, - # then append data to that Text node’s data. - $previousSibling = ($insertBefore === false) ? $adjustedInsertionLocation->lastChild : $adjustedInsertionLocation->previousSibling; - if ($previousSibling instanceof \DOMText) { - $previousSibling->data .= $token->data; - return; - } - - # Otherwise, create a new Text node whose data is data and whose node document - # is the same as that of the element in which the adjusted insertion location - # finds itself, and insert the newly created node at the adjusted insertion - # location. - $textNode = $adjustedInsertionLocation->ownerDocument->createTextNode($token->data); - - if ($insertBefore === false) { - $adjustedInsertionLocation->appendChild($textNode); - } else { - $adjustedInsertionLocation->parentNode->insertBefore($textNode, $adjustedInsertionLocation); - } - } - - public function insertCommentToken(CommentToken $token, \DOMNode $position = null): void { - # When the steps below require the user agent to insert a comment while - # processing a comment token, optionally with an explicitly insertion position - # position, the user agent must run the following steps: - - # 1. Let data be the data given in the comment token being processed. - // Already provided through the token object. - # 2. If position was specified, then let the adjusted insertion location be - # position. Otherwise, let adjusted insertion location be the appropriate place - # for inserting a node. - // OPTIMIZATION: Comments are never foster-parented - $position = $position ?? $this->appropriatePlaceForInsertingNode()['node']; - # 3. Create a Comment node whose data attribute is set to data and whose node - # document is the same as that of the node in which the adjusted insertion - # location finds itself. - # 4. Insert the newly created node at the adjusted insertion location. - $position->appendChild($this->DOM->createComment($token->data)); - } - - public function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null): \DOMElement { - # When the steps below require the user agent to insert a foreign - # element for a token in a given namespace, the user agent must - # run these steps: - // Doing both foreign and HTML elements here because the only - // difference between the two is that foreign elements are inserted - // with a namespace and HTML elements are not. - # Let the adjusted insertion location be the appropriate place for inserting - # a node. - $location = $this->appropriatePlaceForInsertingNode($intendedParent); - # Let element be the result of creating an element for the token in the given - # namespace, with the intended parent being the element in which the adjusted - # insertion location finds itself. - $element = $this->createElementForToken($token, $namespace ?? $token->namespace, $intendedParent); - # 3. If it is possible to insert element at the adjusted insertion location, - # then: - # - 1. Push a new element queue onto the custom element reactions stack. - // DEVIATION: Unnecessary because there is no scripting in this implementation. - # - 2. Insert element at the adjusted insertion location. - if ($location['insert before'] === false) { - $location['node']->appendChild($element); - } else { - $location['node']->parentNode->insertBefore($element, $location['node']); - } - # - 3. Pop the element queue from the custom element reactions stack, and - # invoke custom element reactions in that queue. - // DEVIATION: Unnecessary because there is no scripting in this implementation. - # 4. Push element onto the stack of open elements so that it is the new current node. - $this->stack[] = $element; - # Return element. - return $element; - } - - protected function parseGenericText(StartTagToken $token, bool $RAWTEXT = true) { - # The generic raw text element parsing algorithm and the generic RCDATA element - # parsing algorithm consist of the following steps. These algorithms are always - # invoked in response to a start tag token. - - # 1. Insert an HTML element for the token. - $this->insertStartTagToken($token); - - # 2. If the algorithm that was invoked is the generic raw text element parsing - # algorithm, switch the tokenizer to the RAWTEXT state; otherwise the algorithm - # invoked was the generic RCDATA element parsing algorithm, switch the tokenizer - # to the RCDATA state. - $this->tokenizer->state = ($RAWTEXT === true) ? Tokenizer::RAWTEXT_STATE : Tokenizer::RCDATA_STATE; - - # 3. Let the original insertion mode be the current insertion mode. - $this->originalInsertionMode = $this->insertionMode; - - # 4. Then, switch the insertion mode to "text". - $this->insertionMode = self::TEXT_MODE; - } - - protected function parseGenericRawText(StartTagToken $token) { - $this->parseGenericText($token, true); - } - - protected function parseGenericRCDATA(StartTagToken $token) { - $this->parseGenericText($token, false); - } - - protected function resetInsertionMode(): int { - # When the steps below require the UA to reset the insertion mode appropriately, - # it means the UA must follow these steps: - - # 1. Let last be false. - $last = false; - # 2. Let node be the last node in the stack of open elements. - foreach($this->stack as $position => $node) { - # 3. Loop: If node is the first node in the stack of open elements, then set - # last to true, and, if the parser was originally created as part of the HTML - # fragment parsing algorithm (fragment case), set node to the context element - # passed to that algorithm. - if ($position === 0) { - $last = true; - if ($this->fragmentContext) { - $node = $this->fragmentContext; - } - } - $nodeName = $node->nodeName; - # 4. If node is a select element, run these substeps: - if ($nodeName === 'select') { - # 1. If last is true, jump to the step below labeled Done. - if ($last === false) { - # 2. Let ancestor be node. - # 3. Loop: If ancestor is the first node in the stack of - # open elements, jump to the step below labeled Done. - for ($ancestorPosition = $position; $ancestorPosition > 0;) { - # 4. Let ancestor be the node before ancestor in the stack of open elements. - $ancestor = $this->stack[--$ancestorPosition]; - # 5. If ancestor is a template node, jump to the step below labeled Done. - if ($ancestor->nodeName === 'template') { - break; - } - # 6. If ancestor is a table node, switch the insertion mode to "in select in - # table" and abort these steps. - if ($ancestor->nodeName === 'table') { - return $this->insertionMode = self::IN_SELECT_IN_TABLE_MODE; - } - # 7. Jump back to the step labeled Loop. - } - } - # 8. Done: Switch the insertion mode to "in select" and abort these steps. - return $this->insertionMode = self::IN_SELECT_MODE; - } - # 5. If node is a td or th element and last is false, then switch the insertion - # mode to "in cell" and abort these steps. - elseif (($nodeName === 'td' || $nodeName === 'th') && $last === false) { - return $this->insertionMode = self::IN_CELL_MODE; - } - # 6. If node is a tr element, then switch the insertion mode to "in row" and - # abort these steps. - # 7. If node is a tbody, thead, or tfoot element, then switch the insertion mode - # to "in table body" and abort these steps. - # 8. If node is a caption element, then switch the insertion mode to "in - # caption" and abort these steps. - # 9. If node is a colgroup element, then switch the insertion mode to "in column - # group" and abort these steps. - # 10. If node is a table element, then switch the insertion mode to "in table" - # and abort these steps. - # 13. If node is a body element, then switch the insertion mode to "in body" and - # abort these steps. - # 14. If node is a frameset element, then switch the insertion mode to "in - # frameset" and abort these steps. (fragment case) - elseif (($mode = self::APPROPRIATE_INSERTION_MODES[$nodeName] ?? null) !== null) { - return $this->insertionMode = $mode; - } - # 11. If node is a template element, then switch the insertion mode to the - # current template insertion mode and abort these steps. - elseif ($nodeName === 'template') { - return $this->insertionMode = $this->templateInsertionModes->currentMode; - } - # 12. If node is a head element and last is false, then switch the insertion - # mode to "in head" and abort these steps. - elseif ($nodeName === 'head' && $last === false) { - return $this->insertionMode = self::IN_HEAD_MODE; - } - # 15. If node is an html element, run these substeps: - elseif ($nodeName === 'html') { - # 1. If the head element pointer is null, switch the insertion mode to "before - # head" and abort these steps. (fragment case) - if ($this->headElement === null) { - return $this->insertionMode = self::BEFORE_HEAD_MODE; - } - # 2. Otherwise, the head element pointer is not null, switch the insertion mode - # to "after head" and abort these steps. - return $this->insertionMode = self::AFTER_HEAD_MODE; - } - # 16. If last is true, then switch the insertion mode to "in body" and abort - # these steps. (fragment case) - elseif ($last === true) { - return $this->insertionMode = self::IN_BODY_MODE; - } - # 17. Let node now be the node before node in the stack of open elements. - # 18. Return to the step labeled Loop. - } - } - - protected function closePElement(TagToken $token) { - # When the steps above say the UA is to close a p element, it means that the UA - # must run the following steps: - - # 1. Generate implied end tags, except for p elements. - $this->stack->generateImpliedEndTags("p"); - # 2. If the current node is not a p element, then this is a parse error. - $currentNodeName = $this->stack->currentNodeName; - if ($currentNodeName !== 'p') { - if ($token instanceof StartTagToken) { - $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); - } else { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - } - # 3. Pop elements from the stack of open elements until a p element has been - # popped from the stack. - $this->stack->popUntil('p'); - } - - protected function closeCell(TagToken $token): int { - # Where the steps above say to close the cell, - # they mean to run the following algorithm: - - # Generate implied end tags. - $this->stack->generateImpliedEndTags(); - # If the current node is not now a td element or a th element, - # then this is a parse error. - if (!in_array($this->stack->currentNodeName, ["td", "th"])) { - $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); - } - # Pop elements from the stack of open elements stack until a td - # element or a th element has been popped from the stack. - $this->stack->popUntil("td", "th"); - # Clear the list of active formatting elements up to the last marker. - $this->activeFormattingElementsList->clearToTheLastMarker(); - # Switch the insertion mode to "in row". - return $this->insertionMode = self::IN_ROW_MODE; - } - - protected function isElementSpecial(\DOMElement $element): bool { - $name = $element->nodeName; - $ns = $element->namespaceURI ?? Parser::HTML_NAMESPACE; - return in_array($name, self::SPECIAL_ELEMENTS[$ns] ?? []); - } - - protected function createElementForToken(TagToken $token, ?string $namespace = null, ?\DOMNode $intendedParent = null): \DOMElement { - // DEVIATION: Steps related to scripting have been elided entirely - # Let document be intended parent's node document. - # Let local name be the tag name of the token. - # Let element be the result of creating an element given document, - # localName, given namespace, null, and is. - try { - $element = $this->DOM->createElementNS($namespace, $token->name); - } catch (\DOMException $e) { - // The element name is invalid for XML - // Replace any offending characters with "UHHHHHH" where H are the - // uppercase hexadecimal digits of the character's code point - if ($namespace !== null) { - $qualifiedName = implode(":", array_map([$this, "coerceName"], explode(":", $token->name, 2))); - } else { - $qualifiedName = $this->coerceName($token->name); - } - $element = $this->DOM->createElementNS($namespace, $qualifiedName); - $this->mangledElements = true; - } - # Append each attribute in the given token to element. - foreach ($token->attributes as $attr) { - # If element has an xmlns attribute in the XMLNS namespace whose value - # is not exactly the same as the element's namespace, that is a - # parse error. Similarly, if element has an xmlns:xlink attribute in - # the XMLNS namespace whose value is not the XLink Namespace, that - # is a parse error. - // NOTE: The specification is silent as to how to handle these - // attributes. We assume these bad attributes should be dropped, - // since they break the DOM when added - if ($attr->name === "xmlns" && $namespace !== null && $attr->value !== $namespace) { - $this->error(ParseError::INVALID_NAMESPACE_ATTRIBUTE_VALUE, "xmlns", $namespace); - } elseif ($attr->name === "xmlns:xlink" && $namespace !== null && $attr->value !== Parser::XLINK_NAMESPACE) { - $this->error(ParseError::INVALID_NAMESPACE_ATTRIBUTE_VALUE, "xmlns:xlink", Parser::XLINK_NAMESPACE); - } else { - $this->elementSetAttribute($element, $attr->namespace, $attr->name, $attr->value); - } - } - # Return element. - return $element; - } - - public function elementSetAttribute(\DOMElement $element, ?string $namespaceURI, string $qualifiedName, string $value): void { - if ($namespaceURI === Parser::XMLNS_NAMESPACE) { - // NOTE: We create attribute nodes so that xmlns attributes - // don't get lost; otherwise they cannot be serialized - $a = @$element->ownerDocument->createAttributeNS($namespaceURI, $qualifiedName); - if ($a === false) { - // The document element does not exist yet, so we need - // to insert this element into the document - $element->ownerDocument->appendChild($element); - $a = $element->ownerDocument->createAttributeNS($namespaceURI, $qualifiedName); - $element->ownerDocument->removeChild($element); - } - $a->value = $this->escapeString($value, true); - $element->setAttributeNodeNS($a); - } else { - try { - $element->setAttributeNS($namespaceURI, $qualifiedName, $value); - } catch (\DOMException $e) { - // The attribute name is invalid for XML - // Replace any offending characters with "UHHHHHH" where H are the - // uppercase hexadecimal digits of the character's code point - $element->ownerDocument->mangledAttributes = true; - if ($namespaceURI !== null) { - $qualifiedName = implode(":", array_map([$element, "coerceName"], explode(":", $qualifiedName, 2))); - } else { - $qualifiedName = $this->coerceName($qualifiedName); - } - $element->setAttributeNS($namespaceURI, $qualifiedName, $value); - $this->mangledAttributes = true; - } - if ($qualifiedName === "id" && $namespaceURI === null) { - $element->setIdAttribute($qualifiedName, true); - } - } - } - - public function isMathMLTextIntegrationPoint(\DOMElement $e): bool { - return ($e->namespaceURI === Parser::MATHML_NAMESPACE && (in_array($e->nodeName, ['mi', 'mo', 'mn', 'ms', 'mtext']))); - } - - public function isHTMLIntegrationPoint(\DOMElement $e): bool { - $encoding = strtolower((string)$e->getAttribute('encoding')); - return (( - $e->namespaceURI === Parser::MATHML_NAMESPACE && - $e->nodeName === 'annotation-xml' && ( - $encoding === 'text/html' || $encoding === 'application/xhtml+xml' - ) - ) || ( - $e->namespaceURI === Parser::SVG_NAMESPACE && (in_array($e->nodeName, ['foreignObject', 'desc', 'title'])) - ) - ); - } - - public function reconstructActiveFormattingElements(): void { - # When the steps below require the UA to reconstruct the active formatting - # elements, the UA must perform the following steps: - # 1. If there are no entries in the list of active formatting elements, then - # there is nothing to reconstruct; stop this algorithm. - $last = count($this->activeFormattingElementsList) - 1; - if ($last < 0) { - return; - } - # 2. If the last (most recently added) entry in the list of active formatting - # elements is a marker, or if it is an element that is in the stack of open - # elements, then there is nothing to reconstruct; stop this algorithm. - $pos = $last; - $entry = $this->activeFormattingElementsList[$pos]; - if ($entry instanceof ActiveFormattingElementsMarker || $this->stack->findSame($entry['element']) > -1) { - return; - } - # 3. Let entry be the last (most recently added) element in the list of - # active formatting elements. - // Already done - while ($pos >= 0) { - # 4. Rewind: If there are no entries before entry in the list of active - # formatting elements, then jump to the step labeled Create. - if ($pos === 0) { - // DEVIATION: Instead don't increment position before breaking, unlike below - break; - } - # 5. Let entry be the entry one earlier than entry in the list of active - # formatting elements. - $entry = $this->activeFormattingElementsList[--$pos]; - # 6. If entry is neither a marker nor an element that is also in the stack of - # open elements, go to the step labeled Rewind. - // Instead break if it is a marker or present in the stack - if ($entry instanceof ActiveFormattingElementsMarker || $this->stack->findSame($entry['element']) > -1) { - // DEVIATION: We increment before breaking to avoid having two loop exit points - $pos++; - break; - } - } - while ($pos <= $last) { - # 7. Advance: Let entry be the element one later than entry in the list of - # active formatting elements. - // DEVIATION: We increment at the end of the loop since we incremented when necessary before breaking out of the earlier loop - $entry = $this->activeFormattingElementsList[$pos]; - # 8. Create: Insert an HTML element for the token for which the element entry - # was created, to obtain new element. - $element = $this->insertStartTagToken($entry['token']); - # 9. Replace the entry for entry in the list with an entry for new element. - $this->activeFormattingElementsList[$pos] = ['token' => $entry['token'], 'element' => $element]; - # 10. If the entry for new element in the list of active formatting elements is - # not the last entry in the list, return to the step labeled Advance. - $pos++; - } - } -} diff --git a/lib/ctype.php b/lib/ctype.php deleted file mode 100644 index b442148..0000000 --- a/lib/ctype.php +++ /dev/null @@ -1,34 +0,0 @@ -true,"b"=>true,"c"=>true,"d"=>true,"e"=>true,"f"=>true,"g"=>true,"h"=>true,"i"=>true,"j"=>true,"k"=>true,"l"=>true,"m"=>true,"n"=>true,"o"=>true,"p"=>true,"q"=>true,"r"=>true,"s"=>true,"t"=>true,"u"=>true,"v"=>true,"w"=>true,"x"=>true,"y"=>true,"z"=>true,"A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"G"=>true,"H"=>true,"I"=>true,"J"=>true,"K"=>true,"L"=>true,"M"=>true,"N"=>true,"O"=>true,"P"=>true,"Q"=>true,"R"=>true,"S"=>true,"T"=>true,"U"=>true,"V"=>true,"W"=>true,"X"=>true,"Y"=>true,"Z"=>true,"0"=>true,"1"=>true,"2"=>true,"3"=>true,"4"=>true,"5"=>true,"6"=>true,"7"=>true,"8"=>true,"9"=>true][$str] ?? false; - } - - function ctype_alpha(string $str): bool { - return ["a"=>true,"b"=>true,"c"=>true,"d"=>true,"e"=>true,"f"=>true,"g"=>true,"h"=>true,"i"=>true,"j"=>true,"k"=>true,"l"=>true,"m"=>true,"n"=>true,"o"=>true,"p"=>true,"q"=>true,"r"=>true,"s"=>true,"t"=>true,"u"=>true,"v"=>true,"w"=>true,"x"=>true,"y"=>true,"z"=>true,"A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"G"=>true,"H"=>true,"I"=>true,"J"=>true,"K"=>true,"L"=>true,"M"=>true,"N"=>true,"O"=>true,"P"=>true,"Q"=>true,"R"=>true,"S"=>true,"T"=>true,"U"=>true,"V"=>true,"W"=>true,"X"=>true,"Y"=>true,"Z"=>true][$str] ?? false; - } - - function ctype_upper(string $str): bool { - return ["A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"G"=>true,"H"=>true,"I"=>true,"J"=>true,"K"=>true,"L"=>true,"M"=>true,"N"=>true,"O"=>true,"P"=>true,"Q"=>true,"R"=>true,"S"=>true,"T"=>true,"U"=>true,"V"=>true,"W"=>true,"X"=>true,"Y"=>true,"Z"=>true][$str] ?? false; - } - - function ctype_digit(string $str): bool { - return ["0"=>true,"1"=>true,"2"=>true,"3"=>true,"4"=>true,"5"=>true,"6"=>true,"7"=>true,"8"=>true,"9"=>true][$str] ?? false; - } - - function ctype_xdigit(string $str): bool { - return ["a"=>true,"b"=>true,"c"=>true,"d"=>true,"e"=>true,"f"=>true,"A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"0"=>true,"1"=>true,"2"=>true,"3"=>true,"4"=>true,"5"=>true,"6"=>true,"7"=>true,"8"=>true,"9"=>true][$str] ?? false; - } -} diff --git a/lib/DOM/traits/ContainerNode.php b/lib/traits/ContainerNode.php similarity index 99% rename from lib/DOM/traits/ContainerNode.php rename to lib/traits/ContainerNode.php index 7656400..bab6710 100644 --- a/lib/DOM/traits/ContainerNode.php +++ b/lib/traits/ContainerNode.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; // Node in the DOM spec is dirty. Many nodes which inherit from it inherit // methods it cannot use which all check for this and throw exceptions. This is diff --git a/lib/DOM/traits/DocumentOrElement.php b/lib/traits/DocumentOrElement.php similarity index 98% rename from lib/DOM/traits/DocumentOrElement.php rename to lib/traits/DocumentOrElement.php index 44739d8..8df55f4 100644 --- a/lib/DOM/traits/DocumentOrElement.php +++ b/lib/traits/DocumentOrElement.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; // This exists because the DOM spec for some stupid reason doesn't give // DocumentFragment some methods. diff --git a/lib/DOM/traits/EscapeString.php b/lib/traits/EscapeString.php similarity index 98% rename from lib/DOM/traits/EscapeString.php rename to lib/traits/EscapeString.php index 0dfff10..2244615 100644 --- a/lib/DOM/traits/EscapeString.php +++ b/lib/traits/EscapeString.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; use MensBeam\Intl\Encoding\UTF8; diff --git a/lib/DOM/traits/LeafNode.php b/lib/traits/LeafNode.php similarity index 96% rename from lib/DOM/traits/LeafNode.php rename to lib/traits/LeafNode.php index 5367a93..822d47a 100644 --- a/lib/DOM/traits/LeafNode.php +++ b/lib/traits/LeafNode.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; // Node in the DOM spec is dirty. Many nodes which inherit from it inherit // methods it cannot use which all check for this and throw exceptions. This is diff --git a/lib/DOM/traits/MagicProperties.php b/lib/traits/MagicProperties.php similarity index 98% rename from lib/DOM/traits/MagicProperties.php rename to lib/traits/MagicProperties.php index 0f1806d..02dc55b 100644 --- a/lib/DOM/traits/MagicProperties.php +++ b/lib/traits/MagicProperties.php @@ -5,7 +5,7 @@ */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; /** * Getters and setters in PHP sucks. Instead of having getter and setter diff --git a/lib/DOM/traits/Moonwalk.php b/lib/traits/Moonwalk.php similarity index 98% rename from lib/DOM/traits/Moonwalk.php rename to lib/traits/Moonwalk.php index 39d1eca..ce972ee 100644 --- a/lib/DOM/traits/Moonwalk.php +++ b/lib/traits/Moonwalk.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; trait Moonwalk { /** Generator which walks up the DOM. Nonstandard. */ diff --git a/lib/DOM/traits/MoonwalkShallow.php b/lib/traits/MoonwalkShallow.php similarity index 96% rename from lib/DOM/traits/MoonwalkShallow.php rename to lib/traits/MoonwalkShallow.php index db97f02..c52f988 100644 --- a/lib/DOM/traits/MoonwalkShallow.php +++ b/lib/traits/MoonwalkShallow.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; trait MoonwalkShallow { /** diff --git a/lib/DOM/traits/Node.php b/lib/traits/Node.php similarity index 95% rename from lib/DOM/traits/Node.php rename to lib/traits/Node.php index 9df990f..ad82c7c 100644 --- a/lib/DOM/traits/Node.php +++ b/lib/traits/Node.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; // Extensions to PHP's DOM cannot inherit from an extended Node parent, so a // trait is the next best thing... diff --git a/lib/DOM/traits/ParentNode.php b/lib/traits/ParentNode.php similarity index 99% rename from lib/DOM/traits/ParentNode.php rename to lib/traits/ParentNode.php index 20b6d14..16d9fc5 100644 --- a/lib/DOM/traits/ParentNode.php +++ b/lib/traits/ParentNode.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; if (version_compare(\PHP_VERSION, '8.0', '>=')) { # 4.2.6. Mixin ParentNode diff --git a/lib/DOM/traits/ToString.php b/lib/traits/ToString.php similarity index 93% rename from lib/DOM/traits/ToString.php rename to lib/traits/ToString.php index 07e4edc..898792b 100644 --- a/lib/DOM/traits/ToString.php +++ b/lib/traits/ToString.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; trait ToString { public function __toString(): string { diff --git a/lib/DOM/traits/Walk.php b/lib/traits/Walk.php similarity index 96% rename from lib/DOM/traits/Walk.php rename to lib/traits/Walk.php index a198665..3f5a397 100644 --- a/lib/DOM/traits/Walk.php +++ b/lib/traits/Walk.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; trait Walk { /** Generator which walks down the DOM. Nonstandard. */ diff --git a/lib/DOM/traits/WalkShallow.php b/lib/traits/WalkShallow.php similarity index 95% rename from lib/DOM/traits/WalkShallow.php rename to lib/traits/WalkShallow.php index 33d2e09..bfd36fe 100644 --- a/lib/DOM/traits/WalkShallow.php +++ b/lib/traits/WalkShallow.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; trait WalkShallow { /** diff --git a/tests/bootstrap.php b/tests/bootstrap.php index d074e3c..d7b6e91 100644 --- a/tests/bootstrap.php +++ b/tests/bootstrap.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML; +namespace MensBeam\HTML\DOM; const NS_BASE = __NAMESPACE__."\\"; define(NS_BASE."BASE", dirname(__DIR__).DIRECTORY_SEPARATOR); diff --git a/tests/cases/TestCharset.php b/tests/cases/TestCharset.php index 27e0ef4..81c3f24 100644 --- a/tests/cases/TestCharset.php +++ b/tests/cases/TestCharset.php @@ -4,12 +4,12 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML\TestCase; +namespace MensBeam\HTML\DOM\TestCase; -use MensBeam\HTML\Charset; +use MensBeam\HTML\DOM\Charset; /** - * @covers \MensBeam\HTML\Charset + * @covers \MensBeam\HTML\DOM\Charset */ class TestCharset extends \PHPUnit\Framework\TestCase { /** @dataProvider provideCharsets */ @@ -73,8 +73,8 @@ class TestCharset extends \PHPUnit\Framework\TestCase { $tests = []; $blacklist = []; $files = new \AppendIterator(); - $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); - $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); + $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); + $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); foreach ($files as $file) { if (!in_array(basename($file), $blacklist)) { $tests[] = $file; diff --git a/tests/cases/TestDOM.php b/tests/cases/TestDOM.php index c09c0ae..48a8cc5 100644 --- a/tests/cases/TestDOM.php +++ b/tests/cases/TestDOM.php @@ -4,16 +4,16 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML\TestCase; +namespace MensBeam\HTML\DOM\TestCase; -use MensBeam\HTML\Document; -use MensBeam\HTML\Parser; -use MensBeam\HTML\TemplateElement; +use MensBeam\HTML\DOM\Document; +use MensBeam\HTML\DOM\Parser; +use MensBeam\HTML\DOM\TemplateElement; class TestDOM extends \PHPUnit\Framework\TestCase { /** * @dataProvider provideNamespacedElements - * @covers \MensBeam\HTML\Document::createElementNS + * @covers \MensBeam\HTML\DOM\Document::createElementNS */ public function testCreateNamespacedElements(?string $nsIn, string $nameIn, ?string $nsOut, string $local, string $prefix): void { $d = new Document; @@ -43,7 +43,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase { } /** * @dataProvider provideBareElements - * @covers \MensBeam\HTML\Document::createElement + * @covers \MensBeam\HTML\DOM\Document::createElement */ public function testCreateBareElements(string $nameIn, $nameOut): void { $d = new Document; @@ -62,7 +62,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase { ]; } - /** @covers \MensBeam\HTML\Document::createElementNS */ + /** @covers \MensBeam\HTML\DOM\Document::createElementNS */ public function testCreateTemplateElements(): void { $d = new Document; $t = $d->createElement("template"); @@ -87,7 +87,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase { /** * @dataProvider provideNamespacedAttributeCreations - * @covers \MensBeam\HTML\Document::createAttributeNS + * @covers \MensBeam\HTML\DOM\Document::createAttributeNS */ public function testCreateNamespacedAttributes(?string $nsIn, string $nameIn, string $local, string $prefix): void { $d = new Document; @@ -114,7 +114,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase { /** * @dataProvider provideBareAttributeCreations - * @covers \MensBeam\HTML\Document::createAttribute + * @covers \MensBeam\HTML\DOM\Document::createAttribute */ public function testCreateBareAttributes(string $nameIn, string $nameOut): void { $d = new Document; @@ -135,7 +135,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase { /** * @dataProvider provideNamespacedAttributeSettings - * @covers \MensBeam\HTML\Element::setAttributeNS + * @covers \MensBeam\HTML\DOM\Element::setAttributeNS */ public function testSetNamespoacedAttributes(?string $elementNS, ?string $attrNS, string $nameIn, string $nameOut): void { $d = new Document; @@ -171,7 +171,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase { /** * @dataProvider provideBareAttributeSettings - * @covers \MensBeam\HTML\Element::setAttribute + * @covers \MensBeam\HTML\DOM\Element::setAttribute */ public function testSetBareAttributes(?string $elementNS, string $nameIn, string $nameOut): void { $d = new Document; @@ -201,8 +201,8 @@ class TestDOM extends \PHPUnit\Framework\TestCase { /** * @dataProvider provideAttributeNodeSettings - * @covers \MensBeam\HTML\Element::setAttributeNode - * @covers \MensBeam\HTML\Element::setAttributeNodeNS + * @covers \MensBeam\HTML\DOM\Element::setAttributeNode + * @covers \MensBeam\HTML\DOM\Element::setAttributeNodeNS */ public function testSetAttributeNodes(bool $ns, ?string $elementNS, ?string $attrNS, string $name): void { $d = new Document; @@ -259,9 +259,9 @@ class TestDOM extends \PHPUnit\Framework\TestCase { } /** - * @covers \MensBeam\HTML\Element::hasAttribute - * @covers \MensBeam\HTML\Element::getAttribute - * @covers \MensBeam\HTML\Element::getAttributeNS + * @covers \MensBeam\HTML\DOM\Element::hasAttribute + * @covers \MensBeam\HTML\DOM\Element::getAttribute + * @covers \MensBeam\HTML\DOM\Element::getAttributeNS */ public function testCheckForAttribute(): void { $d = new Document; @@ -305,7 +305,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase { $this->assertSame("ack", $e->getAttributeNS("fake_ns", "eek")); } - /** @covers \MensBeam\HTML\Element::__get */ + /** @covers \MensBeam\HTML\DOM\Element::__get */ public function testGetInnerAndOuterHtml(): void { $d = new Document; $d->appendChild($d->createElement("html")); diff --git a/tests/cases/TestSerializer.php b/tests/cases/TestSerializer.php index 9845b3f..ae5cc5c 100644 --- a/tests/cases/TestSerializer.php +++ b/tests/cases/TestSerializer.php @@ -4,19 +4,19 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML\TestCase; +namespace MensBeam\HTML\DOM\TestCase; -use MensBeam\HTML\Document; -use MensBeam\HTML\Parser; +use MensBeam\HTML\DOM\Document; +use MensBeam\HTML\DOM\Parser; /** - * @covers \MensBeam\HTML\Document - * @covers \MensBeam\HTML\DocumentFragment - * @covers \MensBeam\HTML\Element - * @covers \MensBeam\HTML\TemplateElement - * @covers \MensBeam\HTML\Comment - * @covers \MensBeam\HTML\Text - * @covers \MensBeam\HTML\ProcessingInstruction + * @covers \MensBeam\HTML\DOM\Document + * @covers \MensBeam\HTML\DOM\DocumentFragment + * @covers \MensBeam\HTML\DOM\Element + * @covers \MensBeam\HTML\DOM\TemplateElement + * @covers \MensBeam\HTML\DOM\Comment + * @covers \MensBeam\HTML\DOM\Text + * @covers \MensBeam\HTML\DOM\ProcessingInstruction */ class TestSerializer extends \PHPUnit\Framework\TestCase { /** @dataProvider provideStandardSerializerTests */ @@ -28,7 +28,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase { public function provideStandardSerializerTests(): iterable { $blacklist = []; $files = new \AppendIterator(); - $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); + $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); foreach ($files as $file) { $index = 0; $l = 0; diff --git a/tests/cases/TestTokenizer.php b/tests/cases/TestTokenizer.php index d120b7d..863cf80 100644 --- a/tests/cases/TestTokenizer.php +++ b/tests/cases/TestTokenizer.php @@ -4,31 +4,31 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML\TestCase; +namespace MensBeam\HTML\DOM\TestCase; -use MensBeam\HTML\Data; -use MensBeam\HTML\EOFToken; -use MensBeam\HTML\OpenElementsStack; -use MensBeam\HTML\ParseError; -use MensBeam\HTML\Tokenizer; -use MensBeam\HTML\CharacterToken; -use MensBeam\HTML\CommentToken; -use MensBeam\HTML\DOCTYPEToken; -use MensBeam\HTML\EndTagToken; -use MensBeam\HTML\NullCharacterToken; -use MensBeam\HTML\StartTagToken; -use MensBeam\HTML\TokenAttr; -use MensBeam\HTML\WhitespaceToken; +use MensBeam\HTML\DOM\Data; +use MensBeam\HTML\DOM\EOFToken; +use MensBeam\HTML\DOM\OpenElementsStack; +use MensBeam\HTML\DOM\ParseError; +use MensBeam\HTML\DOM\Tokenizer; +use MensBeam\HTML\DOM\CharacterToken; +use MensBeam\HTML\DOM\CommentToken; +use MensBeam\HTML\DOM\DOCTYPEToken; +use MensBeam\HTML\DOM\EndTagToken; +use MensBeam\HTML\DOM\NullCharacterToken; +use MensBeam\HTML\DOM\StartTagToken; +use MensBeam\HTML\DOM\TokenAttr; +use MensBeam\HTML\DOM\WhitespaceToken; /** - * @covers \MensBeam\HTML\Data - * @covers \MensBeam\HTML\Tokenizer - * @covers \MensBeam\HTML\CharacterToken - * @covers \MensBeam\HTML\CommentToken - * @covers \MensBeam\HTML\DataToken - * @covers \MensBeam\HTML\TagToken - * @covers \MensBeam\HTML\DOCTYPEToken - * @covers \MensBeam\HTML\TokenAttr + * @covers \MensBeam\HTML\DOM\Data + * @covers \MensBeam\HTML\DOM\Tokenizer + * @covers \MensBeam\HTML\DOM\CharacterToken + * @covers \MensBeam\HTML\DOM\CommentToken + * @covers \MensBeam\HTML\DOM\DataToken + * @covers \MensBeam\HTML\DOM\TagToken + * @covers \MensBeam\HTML\DOM\DOCTYPEToken + * @covers \MensBeam\HTML\DOM\TokenAttr */ class TestTokenizer extends \PHPUnit\Framework\TestCase { const STATE_MAP = [ @@ -87,8 +87,8 @@ class TestTokenizer extends \PHPUnit\Framework\TestCase { $tests = []; $blacklist = ["xmlViolation.test"]; $files = new \AppendIterator(); - $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/html5lib-tests/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); - $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); + $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/html5lib-tests/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); + $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); foreach ($files as $file) { if (!in_array(basename($file), $blacklist)) { $tests[] = $file; diff --git a/tests/cases/TestTreeConstructor.php b/tests/cases/TestTreeConstructor.php index 55c112b..c3e3d9c 100644 --- a/tests/cases/TestTreeConstructor.php +++ b/tests/cases/TestTreeConstructor.php @@ -4,31 +4,31 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\HTML\TestCase; +namespace MensBeam\HTML\DOM\TestCase; -use MensBeam\HTML\Data; -use MensBeam\HTML\LoopException; -use MensBeam\HTML\NotImplementedException; -use MensBeam\HTML\OpenElementsStack; -use MensBeam\HTML\ParseError; -use MensBeam\HTML\Parser; -use MensBeam\HTML\TemplateInsertionModesStack; -use MensBeam\HTML\Tokenizer; -use MensBeam\HTML\TreeBuilder; +use MensBeam\HTML\DOM\Data; +use MensBeam\HTML\DOM\LoopException; +use MensBeam\HTML\DOM\NotImplementedException; +use MensBeam\HTML\DOM\OpenElementsStack; +use MensBeam\HTML\DOM\ParseError; +use MensBeam\HTML\DOM\Parser; +use MensBeam\HTML\DOM\TemplateInsertionModesStack; +use MensBeam\HTML\DOM\Tokenizer; +use MensBeam\HTML\DOM\TreeBuilder; /** - * @covers \MensBeam\HTML\Document - * @covers \MensBeam\HTML\Element - * @covers \MensBeam\HTML\Tokenizer - * @covers \MensBeam\HTML\TreeBuilder - * @covers \MensBeam\HTML\ActiveFormattingElementsList - * @covers \MensBeam\HTML\TemplateInsertionModesStack - * @covers \MensBeam\HTML\OpenElementsStack - * @covers \MensBeam\HTML\Stack - * @covers \MensBeam\HTML\TagToken + * @covers \MensBeam\HTML\DOM\Document + * @covers \MensBeam\HTML\DOM\Element + * @covers \MensBeam\HTML\DOM\Tokenizer + * @covers \MensBeam\HTML\DOM\TreeBuilder + * @covers \MensBeam\HTML\DOM\ActiveFormattingElementsList + * @covers \MensBeam\HTML\DOM\TemplateInsertionModesStack + * @covers \MensBeam\HTML\DOM\OpenElementsStack + * @covers \MensBeam\HTML\DOM\Stack + * @covers \MensBeam\HTML\DOM\TagToken */ class TestTreeConstructor extends \PHPUnit\Framework\TestCase { - use \MensBeam\HTML\EscapeString; + use \MensBeam\HTML\DOM\EscapeString; protected $out; protected $depth; @@ -379,8 +379,8 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase { public function provideStandardTreeTests(): iterable { $blacklist = []; $files = new \AppendIterator(); - $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); - $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); + $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); + $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); foreach ($files as $file) { $index = 0; $l = 0;