Browse Source

Changing repo to HTML-DOM

element-classes
Dustin Wilson 3 years ago
parent
commit
5cc51cc6d5
  1. 49
      README.md
  2. 43
      RoboFile.php
  3. 25
      composer.json
  4. 286
      composer.lock
  5. 15
      docs/config.json
  6. 1
      docs/en/010_About.md
  7. 8
      docs/en/020_Installation.md
  8. 63
      docs/en/030_Document_Object_Model/010_Comment.md
  9. 28
      docs/en/030_Document_Object_Model/010_Document/010_construct.md
  10. 13
      docs/en/030_Document_Object_Model/010_Document/020_createEntityReference.md
  11. 46
      docs/en/030_Document_Object_Model/010_Document/020_load.md
  12. 46
      docs/en/030_Document_Object_Model/010_Document/020_loadHTML.md
  13. 9
      docs/en/030_Document_Object_Model/010_Document/020_loadHTMLFile.md
  14. 13
      docs/en/030_Document_Object_Model/010_Document/020_loadXML.md
  15. 43
      docs/en/030_Document_Object_Model/010_Document/020_save.md
  16. 9
      docs/en/030_Document_Object_Model/010_Document/020_saveHTMLFile.md
  17. 13
      docs/en/030_Document_Object_Model/010_Document/020_saveXML.md
  18. 13
      docs/en/030_Document_Object_Model/010_Document/020_validate.md
  19. 13
      docs/en/030_Document_Object_Model/010_Document/020_xinclude.md
  20. 147
      docs/en/030_Document_Object_Model/010_Document/index.md
  21. 24
      docs/en/030_Document_Object_Model/010_Element/010_getAttribute.md
  22. 26
      docs/en/030_Document_Object_Model/010_Element/010_getAttributeNS.md
  23. 100
      docs/en/030_Document_Object_Model/010_Element/index.md
  24. 55
      docs/en/030_Document_Object_Model/ContainerNode/010_appendChild.md
  25. 40
      docs/en/030_Document_Object_Model/ContainerNode/010_insertBefore.md
  26. 14
      docs/en/030_Document_Object_Model/ContainerNode/index.md
  27. 13
      docs/en/030_Document_Object_Model/LeafNode/010_appendChild.md
  28. 13
      docs/en/030_Document_Object_Model/LeafNode/010_insertBefore.md
  29. 13
      docs/en/030_Document_Object_Model/LeafNode/010_removeChild.md
  30. 13
      docs/en/030_Document_Object_Model/LeafNode/010_replaceChild.md
  31. 16
      docs/en/030_Document_Object_Model/LeafNode/index.md
  32. 43
      docs/en/030_Document_Object_Model/Moonwalk/010_moonwalk.md
  33. 11
      docs/en/030_Document_Object_Model/Moonwalk/index.md
  34. 13
      docs/en/030_Document_Object_Model/Node/010_C14N.md
  35. 13
      docs/en/030_Document_Object_Model/Node/010_C14NFile.md
  36. 12
      docs/en/030_Document_Object_Model/Node/index.md
  37. 45
      docs/en/030_Document_Object_Model/Walk/010_walk.md
  38. 11
      docs/en/030_Document_Object_Model/Walk/index.md
  39. 1
      docs/en/030_Document_Object_Model/index.md
  40. 1
      docs/index.md
  41. 9
      docs/theme/php/config.json
  42. 2
      docs/theme/php/daux.min.js
  43. 2
      docs/theme/php/php.css
  44. 324
      docs/theme/src/php.scss
  45. 2
      lib/AbstractDocument.php
  46. 184
      lib/ActiveFormattingElementsList.php
  47. 19
      lib/CharacterReference.php
  48. 432
      lib/Charset.php
  49. 2
      lib/Comment.php
  50. 2
      lib/DOMException.php
  51. 289
      lib/Data.php
  52. 0
      lib/Document.php
  53. 2
      lib/DocumentFragment.php
  54. 4
      lib/Element.php
  55. 2
      lib/ElementMap.php
  56. 109
      lib/Exception.php
  57. 10
      lib/LoopException.php
  58. 57
      lib/NameCoercion.php
  59. 10
      lib/NotImplementedException.php
  60. 372
      lib/OpenElementsStack.php
  61. 215
      lib/ParseError.php
  62. 21
      lib/ParseErrorDummy.php
  63. 20
      lib/ParseErrorEmitter.php
  64. 100
      lib/Parser.php
  65. 2
      lib/ProcessingInstruction.php
  66. 62
      lib/Stack.php
  67. 2
      lib/TemplateElement.php
  68. 19
      lib/TemplateInsertionModesStack.php
  69. 2
      lib/Text.php
  70. 120
      lib/Token.php
  71. 2
      lib/TokenList.php
  72. 3699
      lib/Tokenizer.php
  73. 4307
      lib/TreeBuilder.php
  74. 34
      lib/ctype.php
  75. 2
      lib/traits/ContainerNode.php
  76. 2
      lib/traits/DocumentOrElement.php
  77. 2
      lib/traits/EscapeString.php
  78. 2
      lib/traits/LeafNode.php
  79. 2
      lib/traits/MagicProperties.php
  80. 2
      lib/traits/Moonwalk.php
  81. 2
      lib/traits/MoonwalkShallow.php
  82. 2
      lib/traits/Node.php
  83. 2
      lib/traits/ParentNode.php
  84. 2
      lib/traits/ToString.php
  85. 2
      lib/traits/Walk.php
  86. 2
      lib/traits/WalkShallow.php
  87. 2
      tests/bootstrap.php
  88. 10
      tests/cases/TestCharset.php
  89. 34
      tests/cases/TestDOM.php
  90. 22
      tests/cases/TestSerializer.php
  91. 48
      tests/cases/TestTokenizer.php
  92. 44
      tests/cases/TestTreeConstructor.php

49
README.md

@ -1,48 +1,3 @@
# HTML #
# HTML DOM #
Tools for parsing and printing HTML5 documents and fragments.
```php
<?php
$dom = MensBeam\HTML\Parser::parse('<!DOCTYPE html><html lang="en" charset="utf-8"><head><title>Ook!</title></head><body><h1>Ook!</h1><p>Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.</p><p>Eek!</p></body></html>');
?>
```
or:
```php
<?php
$dom = new MensBeam\HTML\Document;
$dom->loadHTML('<!DOCTYPE html><html lang="en" charset="utf-8"><head><title>Ook!</title></head><body><h1>Ook!</h1><p>Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.</p><p>Eek!</p></body></html>');
?>
```
## Comparison with `masterminds/html5` ##
This library and [masterminds/html5](https://packagist.org/packages/masterminds/html5) serve similar purposes. Generally, we are more accurate, but they are much faster. The following table summarizes the main functional differences.
| | DOMDocument | Masterminds | MensBeam |
|-----------------------------------------------------|---------------------------------------|----------------------------------------------------------|----------------------------------------|
| Minimum PHP version | 5.0 | 5.3 | 7.1 |
| Extensions required | dom | dom, ctype, mbstring or iconv | dom |
| Target HTML version | HTML 4.01 | HTML 5.0 | WHATWG Living Standard |
| Supported encodings | System-dependent | System-dependent | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#character-encodings) |
| Encoding detection | BOM, http-equiv | None | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding) (Steps 1-5 & 9) |
| Fallback encoding | ISO 8859-1 | UTF-8, configurable | Windows-1252, configurable |
| Handling of invalid characters | Bytes are passed through | Characters are dropped | [Per specification](https://encoding.spec.whatwg.org/#concept-encoding-process) |
| Handling of invalid XML element names | Variable | Name is changed to "invalid" | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#coercing-an-html-dom-into-an-infoset) |
| Handling of invalid XML attribute names | Variable | Attribute is dropped | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#coercing-an-html-dom-into-an-infoset) |
| Handling of misnested tags | Parent end tags always close children | Parent end tags always close children | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser) |
| Handling of data between table cells | Left as-is | Left as-is | [Per specification](https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser) |
| Handling of omitted start tags | Elements are not inserted | Elements are not inserted | Per specification |
| Handling of processing instructions | Processing instructions are retained | Processing instructions are retained | Per specification |
| Handling of bogus XLink namespace\* | Foreign content not supported | XLink attributes are lost if preceded by bogus namespace | Bogus namespace is ignored |
| Namespace for HTML elements | Null | Per specification, configurable | Null |
| Time needed to parse single-page HTML specification | 0.5 seconds | 2.7 seconds† | 6.0 seconds‡ |
| Peak memory needed for same | 11.6 MB | 38 MB | 13.9 MB |
\* For example: `<svg xmlns:xlink='http://www.w3.org/1999/xhtml' xlink:href='http://example.com/'/>`. It is unclear what correct behaviour is, but we believe our behaviour to be more consistent with the intent of the specification.
† With HTML namespace disabled. With HTML namespace enabled it does not finish in a reasonable time due to a PHP bug.
‡ With parse errors suppressed. Reporting parse errors adds approximately 10% overhead.
Modern DOM library written in PHP for HTML documents.

43
RoboFile.php

@ -21,47 +21,6 @@ function norm(string $path): string {
}
class RoboFile extends \Robo\Tasks {
/** Generates static manual pages in the "manual" directory
*
* The resultant files are suitable for offline viewing and inclusion into release builds
*/
public function manual(array $args): Result {
$execpath = escapeshellarg(norm(BASE."vendor/bin/daux"));
$t = $this->collectionBuilder();
$t->taskExec($execpath)->arg("generate")->option("-d", BASE."manual")->args($args);
return $t->run();
}
/** Serves a live view of the manual using the built-in Web server */
public function manualLive(array $args): Result {
$execpath = escapeshellarg(norm(BASE."vendor/bin/daux"));
return $this->taskExec($execpath)->arg("serve")->args($args)->run();
}
/** Rebuilds the entire manual theme
*
* This requires Node and Yarn to be installed, and only needs to be done when
* Daux's theme changes
*/
public function manualTheme(array $args): Result {
$postcss = escapeshellarg(norm(BASE."node_modules/.bin/postcss"));
$themesrc = norm(BASE."docs/theme/src/").\DIRECTORY_SEPARATOR;
$themeout = norm(BASE."docs/theme/php/").\DIRECTORY_SEPARATOR;
$dauxjs = norm(BASE."vendor/daux/vendor/daux/daux.io/themes/daux/js/").\DIRECTORY_SEPARATOR;
// start a collection; this stops after the first failure
$t = $this->collectionBuilder();
// install dependencies via Yarn
$t->taskExec("yarn install");
// compile the stylesheet
$t->taskExec($postcss)->arg($themesrc."php.scss")->option("-o", $themeout."php.css");
// copy JavaScript files from the Daux theme
foreach (glob($dauxjs."daux*.js") as $file) {
$t->taskFilesystemStack()->copy($file, $themeout.basename($file), true);
}
// execute the collection
return $t->run();
}
/** Runs the typical test suite
*
* Arguments passed to the task are passed on to PHPUnit. Thus one may, for
@ -204,7 +163,7 @@ class RoboFile extends \Robo\Tasks {
$template = <<<'FILE'
<?php
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
// This file is machine-generated
// DO NOT MODIFY

25
composer.json

@ -1,15 +1,10 @@
{
"name": "mensbeam/html",
"description": "Parses modern HTML text into a PHP DOMDocument",
"name": "mensbeam/html-dom",
"description": "Modern DOM library written in PHP for HTML documents",
"type": "library",
"require": {
"php": ">=7.1",
"ext-dom": "*",
"mensbeam/intl": ">=0.9.0",
"mensbeam/mimesniff": "^0.2.0"
},
"suggest": {
"ext-ctype": "Improved performance"
"ext-dom": "*"
},
"scripts": {
"post-install-cmd": ["@composer bin all install"],
@ -30,24 +25,20 @@
],
"autoload": {
"psr-4": {
"MensBeam\\HTML\\": [
"MensBeam\\HTML\\DOM\\": [
"lib/",
"lib/DOM",
"lib/DOM/traits"
"lib/traits"
]
},
"classmap": ["lib/Token.php"],
"files": ["lib/ctype.php"]
}
},
"autoload-dev": {
"psr-4": {
"MensBeam\\HTML\\Test\\": "tests/lib/",
"MensBeam\\HTML\\TestCase\\": "tests/cases/"
"MensBeam\\HTML\\DOM\\Test\\": "tests/lib/",
"MensBeam\\HTML\\DOM\\TestCase\\": "tests/cases/"
}
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"masterminds/html5": "^2.7",
"daux/daux.io": "^0.16.0"
}
}

286
composer.lock

@ -4,168 +4,8 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "713eb048e9e334071336deca47de7e87",
"packages": [
{
"name": "mensbeam/intl",
"version": "0.9.0",
"source": {
"type": "git",
"url": "https://github.com/mensbeam/intl.git",
"reference": "de037b182ce99aaa90ebc09b0ee0457ddf1d07bc"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/intl/zipball/de037b182ce99aaa90ebc09b0ee0457ddf1d07bc",
"reference": "de037b182ce99aaa90ebc09b0ee0457ddf1d07bc",
"shasum": ""
},
"require": {
"php": ">=7.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "*",
"ext-intl": "*"
},
"type": "library",
"autoload": {
"psr-4": {
"MensBeam\\Intl\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "A set of dependency-free basic internationalization tools",
"keywords": [
"WHATWG",
"charset",
"encoding",
"internationalization",
"intl",
"unicode",
"utf-8",
"utf8"
],
"support": {
"issues": "https://github.com/mensbeam/intl/issues",
"source": "https://github.com/mensbeam/intl/tree/0.9.0"
},
"time": "2021-03-25T19:08:04+00:00"
},
{
"name": "mensbeam/mimesniff",
"version": "0.2.1",
"source": {
"type": "git",
"url": "https://github.com/mensbeam/mime.git",
"reference": "c19be2496ab1e27fbf9c3483c2a9faa2781796cd"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/mime/zipball/c19be2496ab1e27fbf9c3483c2a9faa2781796cd",
"reference": "c19be2496ab1e27fbf9c3483c2a9faa2781796cd",
"shasum": ""
},
"require": {
"php": ">=7.1",
"psr/http-message": "^1.0"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"ext-intl": "*"
},
"type": "library",
"autoload": {
"psr-4": {
"MensBeam\\Mime\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "An implementation of the WHATWG MIME Sniffing specification",
"keywords": [
"WHATWG",
"mime",
"mimesniff"
],
"support": {
"issues": "https://github.com/mensbeam/mime/issues",
"source": "https://github.com/mensbeam/mime/tree/0.2.1"
},
"time": "2021-03-07T03:58:00+00:00"
},
{
"name": "psr/http-message",
"version": "1.0.1",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-message.git",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "http://www.php-fig.org/"
}
],
"description": "Common interface for HTTP messages",
"homepage": "https://github.com/php-fig/http-message",
"keywords": [
"http",
"http-message",
"psr",
"psr-7",
"request",
"response"
],
"support": {
"source": "https://github.com/php-fig/http-message/tree/master"
},
"time": "2016-08-06T14:39:51+00:00"
}
],
"content-hash": "0e733e74b1b163aa4cd80329ff9c71d0",
"packages": [],
"packages-dev": [
{
"name": "bamarni/composer-bin-plugin",
@ -694,75 +534,6 @@
},
"time": "2020-12-25T05:00:37+00:00"
},
{
"name": "masterminds/html5",
"version": "2.7.5",
"source": {
"type": "git",
"url": "https://github.com/Masterminds/html5-php.git",
"reference": "f640ac1bdddff06ea333a920c95bbad8872429ab"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Masterminds/html5-php/zipball/f640ac1bdddff06ea333a920c95bbad8872429ab",
"reference": "f640ac1bdddff06ea333a920c95bbad8872429ab",
"shasum": ""
},
"require": {
"ext-ctype": "*",
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Masterminds\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Matt Butcher",
"email": "technosophos@gmail.com"
},
{
"name": "Matt Farina",
"email": "matt@mattfarina.com"
},
{
"name": "Asmir Mustafic",
"email": "goetas@gmail.com"
}
],
"description": "An HTML5 parser and serializer.",
"homepage": "http://masterminds.github.io/html5-php",
"keywords": [
"HTML5",
"dom",
"html",
"parser",
"querypath",
"serializer",
"xml"
],
"support": {
"issues": "https://github.com/Masterminds/html5-php/issues",
"source": "https://github.com/Masterminds/html5-php/tree/2.7.5"
},
"time": "2021-07-01T14:25:37+00:00"
},
{
"name": "psr/container",
"version": "1.1.1",
@ -918,6 +689,59 @@
},
"time": "2019-04-30T12:38:16+00:00"
},
{
"name": "psr/http-message",
"version": "1.0.1",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-message.git",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "http://www.php-fig.org/"
}
],
"description": "Common interface for HTTP messages",
"homepage": "https://github.com/php-fig/http-message",
"keywords": [
"http",
"http-message",
"psr",
"psr-7",
"request",
"response"
],
"support": {
"source": "https://github.com/php-fig/http-message/tree/master"
},
"time": "2016-08-06T14:39:51+00:00"
},
{
"name": "ralouphie/getallheaders",
"version": "3.0.3",

15
docs/config.json

@ -1,15 +0,0 @@
{
"title": "HTML",
"tagline": "Tools for parsing and printing HTML5 documents and fragments.",
"author": "Dustin Wilson",
"languages": {
"en": "English"
},
"themes_directory": "docs/theme",
"html": {
"theme":"php",
"float": false,
"toggle_code": false,
"search": false
}
}

1
docs/en/010_About.md

@ -1 +0,0 @@
HTML is a library which provides tools for parsing and printing of HTML5 documents and fragments. Unlike PHP's DOM and other similar libraries the goal of the project is to parse HTML as accurate to the specification as possible given the limitations of PHP's DOM and of the uses of the library. Therefore, there is no scripting in this implementation, and there likely never will be.

8
docs/en/020_Installation.md

@ -1,8 +0,0 @@
We try to make the installation of the MensBeam HTML library as easy and straightforward as possible.
## Requirements ##
HTML intentionally has few requirements. It only requires PHP 7.1.0 or later with the [dom](http://php.net/manual/en/book.dom.php) extension installed. It is recommended to install the [ctype](https://www.php.net/manual/en/book.ctype.php) extension for performance improvements, but it is not required.
TODO: Add Installation instructions once there are releases and a package is available on Packagist.

63
docs/en/030_Document_Object_Model/010_Comment.md

@ -1,63 +0,0 @@
---
title: Comment
---
# The Comment Class #
## Introduction ##
<div class="admonition info"><p><strong>Info</strong> Only new methods and methods which make outward-facing changes from <a href="https://www.php.net/manual/en/class.domcomment.php">\DOMComment</a> will be documented here, otherwise they will be linked back to PHP's documentation.</p></div>
## Class Synopsis ##
<pre><code class="php">MensBeam\HTML\Comment extends <a href="https://www.php.net/manual/en/class.domcomment.php">\DOMComment</a> {
use <a href="../LeafNode/index.html">LeafNode</a>, <a href="../Moonwalk/index.html">Moonwalk</a>;
/* Inherited properties */
public string <a href="https://www.php.net/manual/en/class.domcharacterdata.php#domcharacterdata.props.data">$data</a> ;
public readonly int <a href="https://www.php.net/manual/en/class.domcharacterdata.php#domcharacterdata.props.length">$length</a> ;
public readonly string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodename">$nodeName</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodevalue">$nodeValue</a> ;
public readonly int <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodetype">$nodeType</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.parentnode">$parentNode</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnodelist.php">\DOMNodeList</a> <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.childnodes">$childNodes</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.firstchild">$firstChild</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.lastchild">$lastChild</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.previoussibling">$previousSibling</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nextsibling">$nextSibling</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnamednodemap.php">\DOMNamedNodeMap</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.attributes">$attributes</a> ;
public readonly Document|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.ownerdocument">$ownerDocument</a> ;
public readonly string|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.namespaceuri">$namespaceURI</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.prefix">$prefix</a> ;
public readonly string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.localname">$localName</a> ;
public readonly string|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.baseuri">$baseURI</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.textcontent">$textContent</a> ;
/* Trait Methods */
public <a href="../LeafNode/appendChild.html">LeafNode::appendChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node ) : DOMException;
public <a href="../Node/C14N.html">Node::C14N</a> ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
public <a href="../Node/C14NFile.html">Node::C14NFile</a> ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
public <a href="../LeafNode/insertBefore.html">LeafNode::insertBefore</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null $child = null ) : DOMException
public <a href="../Moonwalk/moonwalk.html">Moonwalk::moonwalk</a> ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a>|null $filter = null ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
public <a href="../LeafNode/removeChild.html">LeafNode::removeChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : DOMException
public <a href="../LeafNode/replaceChild.html">LeafNode::replaceChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : DOMException
/* Magic Methods */
public __toString() : string
/* Inherited Methods */
public <a href="https://www.php.net/manual/en/domcomment.construct.php">__construct</a> ( string $data = "" )
public <a href="https://www.php.net/manual/en/domnode.clonenode.php">\DOMNode::cloneNode</a> ( bool $deep = false ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domnode.getlineno.php">\DOMNode::getLineNo</a> ( ) : int
public <a href="https://www.php.net/manual/en/domnode.getnodepath.php">\DOMNode::getNodePath</a> ( ) : string|null
public <a href="https://www.php.net/manual/en/domnode.hasattributes.php">\DOMNode::hasAttributes</a> ( ) : bool
public <a href="https://www.php.net/manual/en/domnode.haschildnodes.php">\DOMNode::hasChildNodes</a> ( ) : bool
public <a href="https://www.php.net/manual/en/domnode.isdefaultnamespace.php">\DOMNode::isDefaultNamespace</a> ( string $namespace ) : bool
public <a href="https://www.php.net/manual/en/domnode.issamenode.php">\DOMNode::isSameNode</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $otherNode ) : bool
public <a href="https://www.php.net/manual/en/domnode.issupported.php">\DOMNode::isSupported</a> ( string $feature , string $version ) : bool
public <a href="https://www.php.net/manual/en/domnode.lookupnamespaceuri.php">\DOMNode::lookupNamespaceUri</a> ( string $prefix ) : string
public <a href="https://www.php.net/manual/en/domnode.lookupprefix.php">\DOMNode::lookupPrefix</a> ( string $namespace ) : string|null
public <a href="https://www.php.net/manual/en/domnode.normalize.php">\DOMNode::normalize</a> ( ) : void
}</code></pre>

28
docs/en/030_Document_Object_Model/010_Document/010_construct.md

@ -1,28 +0,0 @@
---
title: Document::__construct
---
Document::__construct — Creates a new Document object
## Description ##
```php
public Document::__construct ( )
```
Creates a new Document object.
## Examples ##
**Example \#1 Creating a new Document**
```php
<?php
namespace MensBeam\HTML;
$dom = new Document();
echo $dom;
?>
```

13
docs/en/030_Document_Object_Model/010_Document/020_createEntityReference.md

@ -1,13 +0,0 @@
---
title: Document::createEntityReference
---
Document::createEntityReference — **DISABLED**
## Description ##
```php
public Document::createEntityReference ( string $name ) : false
```
This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php). DOM4 does not have entity references or entity nodes.

46
docs/en/030_Document_Object_Model/010_Document/020_load.md

@ -1,46 +0,0 @@
---
title: Document::load
---
Document::load — Load HTML from a file
## Description ##
```php
public Document::load ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
```
Loads an HTML document from a file.
## Parameters ##
<dl>
<dt><code>filename</code></dt>
<dd>The path to the HTML document.</dd>
<dt><code>options</code></dt>
<dd>Always <code>null</code>. Was used for option constants in <a href="https://www.php.net/manual/en/class.domdocument.php"><code>\DOMDocument</code></a>.</dd>
<dt><code>encodingOrContentType</code></dt>
<dd>The encoding of the document that is being loaded. If not specified it will be determined automatically.</dd>
</dl>
## Return Values ##
Returns <code>true</code> on success or <code>false</code> on failure.
## Examples ##
**Example \#1 Creating a Document**
```php
<?php
namespace MensBeam\HTML;
$dom = new Document();
$dom->load('ook.html');
echo $dom;
?>
```

46
docs/en/030_Document_Object_Model/010_Document/020_loadHTML.md

@ -1,46 +0,0 @@
---
title: Document::loadHTML
---
Document::loadHTML — Load HTML from a string
## Description ##
```php
public Document::loadHTML ( string $source , null $options = null , string|null $encodingOrContentType = null ) : bool
```
The function parses the HTML contained in the string <var>source</var>.
## Parameters ##
<dl>
<dt><code>source</code></dt>
<dd>The HTML string.</dd>
<dt><code>options</code></dt>
<dd>Always <code>null</code>. Was used for option constants in <a href="https://www.php.net/manual/en/class.domdocument.php"><code>\DOMDocument</code></a>.</dd>
<dt><code>encodingOrContentType</code></dt>
<dd>The encoding of the document that is being loaded. If not specified it will be determined automatically.</dd>
</dl>
## Return Values ##
Returns <code>true</code> on success or <code>false</code> on failure.
## Examples ##
**Example \#1 Creating a Document**
```php
<?php
namespace MensBeam\HTML;
$dom = new Document();
$dom->loadHTML('<!DOCTYPE html><html><head><title>Ook!</title></head><body><h1>Eek</h1></body></html>');
echo $dom;
?>
```

9
docs/en/030_Document_Object_Model/010_Document/020_loadHTMLFile.md

@ -1,9 +0,0 @@
---
title: Document::loadHTMLFile
---
Document::loadHTMLFile — Alias of <a href="Document_load.html"><code>Document::load()</code></a>
## Description ##
This function is an alias of <a href="Document_load.html"><code>Document::load()</code></a>.

13
docs/en/030_Document_Object_Model/010_Document/020_loadXML.md

@ -1,13 +0,0 @@
---
title: Document::loadXML
---
Document::loadXML — **DISABLED**
## Description ##
```php
public Document::loadXML ( string $source , null $options = null ) : false
```
This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php).

43
docs/en/030_Document_Object_Model/010_Document/020_save.md

@ -1,43 +0,0 @@
---
title: Document::save
---
Document::save — Serializes the DOM tree into a file
## Description ##
```php
public Document::save ( string $filename , null $options = null ) : int|false
```
Creates an HTML document from the DOM representation.
## Parameters ##
<dl>
<dt><code>filename</code></dt>
<dd>The path to the saved HTML document</dd>
<dt><code>options</code></dt>
<dd>Always <code>null</code>. Was used for option constants in <a href="https://www.php.net/manual/en/class.domdocument.php"><code>\DOMDocument</code></a>.</dd>
</dl>
## Return Values ##
Returns the number of bytes written or <code>false</code> on failure.
## Examples ##
**Example \#1 Saving a DOM tree into a file**
```php
<?php
namespace MensBeam\HTML;
$dom = new Document();
$dom->loadHTML('<!DOCTYPE html><html><head><title>Ook!</title></head><body><h1>Eek</h1></body></html>');
echo 'Wrote: ' . $dom->save('/tmp/test.html') . ' bytes'; // Wrote: 85 bytes
?>
```

9
docs/en/030_Document_Object_Model/010_Document/020_saveHTMLFile.md

@ -1,9 +0,0 @@
---
title: Document::saveHTMLFile
---
Document::saveHTMLFile — Alias of <a href="Document_save.html"><code>Document::save()</code></a>
## Description ##
This function is an alias of <a href="Document_save.html"><code>Document::save()</code></a>.

13
docs/en/030_Document_Object_Model/010_Document/020_saveXML.md

@ -1,13 +0,0 @@
---
title: Document::saveXML
---
Document::saveXML — **DISABLED**
## Description ##
```php
public Document::saveXML ( DOMNode|null $node = null , null $options = null ) : false
```
This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php).

13
docs/en/030_Document_Object_Model/010_Document/020_validate.md

@ -1,13 +0,0 @@
---
title: Document::validate
---
Document::validate — **DISABLED**
## Description ##
```php
public Document::validate ( ) : true
```
This function has been disabled and will always return `true`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php).

13
docs/en/030_Document_Object_Model/010_Document/020_xinclude.md

@ -1,13 +0,0 @@
---
title: Document::xinclude
---
Document::xinclude — **DISABLED**
## Description ##
```php
public Document::xinclude ( null $options = null ) : false
```
This function has been disabled and will always return `false`. Documented to show difference from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php).

147
docs/en/030_Document_Object_Model/010_Document/index.md

@ -1,147 +0,0 @@
---
title: Document
---
# The Document Class #
## Introduction ##
Represents an entire HTML document; serves as the root of the document tree. Unlike the PHP [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php) class in which it inherits from it cannot be used to represent an XML document. It is strictly used to represent HTML.
<div class="admonition"><p><strong>Note:</strong> Only new methods and methods which make outward-facing changes from <a href="https://www.php.net/manual/en/class.domdocument.php">\DOMDocument</a> will be documented here, otherwise they will be linked back to PHP's documentation.</p></div>
## Class Synopsis ##
<pre><code class="php">MensBeam\HTML\Document extends <a href="https://www.php.net/manual/en/class.domdocument.php">\DOMDocument</a> {
use <a href="../ContainerNode/index.html">ContainerNode</a>, <a href="../Walk/index.html">Walk</a>;
/* Constants */
public const NO_QUIRKS_MODE = 0 ;
public const QUIRKS_MODE = 1 ;
public const LIMITED_QUIRKS_MODE = 2 ;
/* Properties */
public <a href="../Element/index.html">Element</a>|null <a href="#document-props-body">$body</a> = null ;
public string|null <a href="#document-props-documentencoding">$documentEncoding</a> = null ;
public int <a href="#document-props-quirksmode">$quirksMode</a> = 0 ;
/* Inherited properties */
public readonly <a href="https://www.php.net/manual/en/class.domnamednodemap.php">\DOMNamedNodeMap</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.attributes">$attributes</a> ;
public readonly string|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.baseuri">$baseURI</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnodelist.php">\DOMNodeList</a> <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.childnodes">$childNodes</a> ;
public readonly DocumentType <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.doctype">$doctype</a> ;
public readonly Element <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.documentelement">$documentElement</a> ;
public string|null <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.documenturi">$documentURI</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.firstchild">$firstChild</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domimplementation.php">\DOMImplementation</a> <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.implementation">$implementation</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.lastchild">$lastChild</a> ;
public readonly string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.localname">$localName</a> ;
public readonly string|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.namespaceuri">$namespaceURI</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nextsibling">$nextSibling</a> ;
public readonly string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodename">$nodeName</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodevalue">$nodeValue</a> ;
public readonly int <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodetype">$nodeType</a> ;
public readonly Document|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.ownerdocument">$ownerDocument</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.parentnode">$parentNode</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.prefix">$prefix</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.previoussibling">$previousSibling</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.textcontent">$textContent</a> ;
/* Methods */
public <a href="construct.html">__construct</a> ( )
public <a href="createEntityReference.html">createEntityReference</a> ( string $name ) : false
public <a href="load.html">load</a> ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
public <a href="loadHTML.html">loadHTML</a> ( string $source , null $options = null , string|null $encodingOrContentType = null ) : bool
public <a href="loadHTMLFile.html">loadHTMLFile</a> ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
public <a href="loadHTML.html">loadXML</a> ( string $source , null $options = null ) : false
public <a href="save.html">save</a> ( string $filename , null $options = null ) : int|false
public <a href="saveHTMLFile.html">saveHTMLFile</a> ( string $filename , null $options = null ) : int|false
public <a href="saveXML.html">saveXML</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null $node = null , null $options = null ) : false
public <a href="validate.html">validate</a> ( ) : true
public <a href="xinclude.html">xinclude</a> ( null $options = null ) : false
/* Trait Methods */
public <a href="../ContainerNode/appendChild.html">ContainerNode::appendChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="../Node/C14N.html">Node::C14N</a> ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
public <a href="../Node/C14NFile.html">Node::C14NFile</a> ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
public <a href="../ContainerNode/insertBefore.html">ContainerNode::insertBefore</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null $child = null ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="../Walk/walk.html">Walk::walk</a> ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a>|null $filter = null ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
/* Magic Methods */
public __toString() : string
/* Inherited methods */
public <a href="https://www.php.net/manual/en/domnode.clonenode.php">\DOMNode::cloneNode</a> ( bool $deep = false ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domdocument.createattribute.php">\DOMDocument::createAttribute</a> ( string $localName ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|false
public <a href="https://www.php.net/manual/en/domdocument.createattributens.php">\DOMDocument::createAttributeNS</a> ( string|null $namespace , string $qualifiedName ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|false
public <a href="https://www.php.net/manual/en/domdocument.createcdatasection.php">\DOMDocument::createCDATASection</a> ( string $data ) : <a href="https://www.php.net/manual/en/class.domcdatasection.php">\DOMCdataSection</a>|false
public <a href="https://www.php.net/manual/en/domdocument.createcomment.php">\DOMDocument::createComment</a> ( string $data ) : Comment|false
public <a href="https://www.php.net/manual/en/domdocument.createdocumentfragment.php">\DOMDocument::createDocumentFragment</a> ( ) : DocumentFragment|false
public <a href="https://www.php.net/manual/en/domdocument.createelement.php">\DOMDocument::createElement</a> ( string $localName , string $value = "" ) : Element|false
public <a href="https://www.php.net/manual/en/domdocument.createelementns.php">\DOMDocument::createElementNS</a> ( string|null $namespace , string $qualifiedName , string $value = "" ) : Element|false
public <a href="https://www.php.net/manual/en/domdocument.createprocessinginstruction.php">\DOMDocument::createProcessingInstruction</a> ( string $target , string $data = "" ) : ProcessingInstruction|false
public <a href="https://www.php.net/manual/en/domdocument.createtextnode.php"\DOMDocument::>\DOMDocument::createTextNode</a> ( string $data ) : Text|false
public <a href="https://www.php.net/manual/en/domdocument.getelementbyid.php">\DOMDocument::getElementById</a> ( string $elementId ) : Element|null
public <a href="https://www.php.net/manual/en/domdocument.getelementsbytagname.php">\DOMDocument:getElementsByTagName</a> ( string $qualifiedName ) : <a href="https://www.php.net/manual/en/class.domnodelist.php">\DOMNodeList</a>
public <a href="https://www.php.net/manual/en/domdocument.createelementsbytagnamens.php">getElementsByTagNameNS</a> ( string $namespace , string $localName ) : <a href="https://www.php.net/manual/en/class.domnodelist.php">\DOMNodeList</a>
public <a href="https://www.php.net/manual/en/domnode.getlineno.php">\DOMNode::getLineNo</a> ( ) : int
public <a href="https://www.php.net/manual/en/domnode.getnodepath.php">\DOMNode::getNodePath</a> ( ) : string|null
public <a href="https://www.php.net/manual/en/domnode.hasattributes.php">\DOMNode::hasAttributes</a> ( ) : bool
public <a href="https://www.php.net/manual/en/domnode.haschildnodes.php">\DOMNode::hasChildNodes</a> ( ) : bool
public <a href="https://www.php.net/manual/en/domdocument.importnode.php">\DOMDocument::importNode</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , bool $deep = false ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domnode.isdefaultnamespace.php">\DOMNode::isDefaultNamespace</a> ( string $namespace ) : bool
public <a href="https://www.php.net/manual/en/domnode.issamenode.php">\DOMNode::isSameNode</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $otherNode ) : bool
public <a href="https://www.php.net/manual/en/domnode.issupported.php">\DOMNode::isSupported</a> ( string $feature , string $version ) : bool
public <a href="https://www.php.net/manual/en/domnode.lookupnamespaceuri.php">\DOMNode::lookupNamespaceUri</a> ( string $prefix ) : string
public <a href="https://www.php.net/manual/en/domnode.lookupprefix.php">\DOMNode::lookupPrefix</a> ( string $namespace ) : string|null
public <a href="https://www.php.net/manual/en/domnode.normalize.php">\DOMNode::normalize</a> ( ) : void
public <a href="https://www.php.net/manual/en/domdocument.normalizedocument.php">\DOMDocument::normalizeDocument</a> ( ) : void
public <a href="https://www.php.net/manual/en/domdocument.registernodeclass.php">\DOMDocument::registerNodeClass</a> ( string $baseClass , string|null $extendedClass ) : bool
public <a href="https://www.php.net/manual/en/domdocument.relaxngvalidate.php">\DOMDocument::relaxNGValidate</a> ( string $filename ) : bool
public <a href="https://www.php.net/manual/en/domdocument.relaxngvalidatesource.php">\DOMDocument::relaxNGValidateSource</a> ( string $source ) : bool
public <a href="https://www.php.net/manual/en/domnode.removechild.php">\DOMNode::removeChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domnode.replacechild.php">\DOMNode::replaceChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domdocument.savehtml.php">\DOMDocument::saveHTML</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null $node = null ) : string|false
public <a href="https://www.php.net/manual/en/domdocument.schemavalidate.php">\DOMDocument::schemaValidate</a> ( string $filename , int $flags = 0 ) : bool
public <a href="https://www.php.net/manual/en/domdocument.schemavalidatesource.php">\DOMDocument::schemaValidateSource</a> ( string $source , int $flags = 0 ) : bool
}</code></pre>
## Constants ##
| Constant | Value | Description |
| ----------------------------------------------------- | ----- | ------------------------------------- |
| <var>MensBeam\HTML\Document::NO_QUIRKS_MODE</var> | 0 | Document not in quirks mode |
| <var>MensBeam\HTML\Document::QUIRKS_MODE</var> | 1 | Document is in quirks mode |
| <var>MensBeam\HTML\Document::LIMITEDQUIRKS_MODE</var> | 2 | Document is in limited quirks mode |
## Properties ##
<dl>
<dt id="document-props-body"><var>body</var></dt>
<dd>Represents the <code>body</code> or <code>frameset</code> node of the current document, or <code>null</code> if no such element exists.</dd>
<dt id="document-props-documentencoding"><var>documentEncoding</var></dt>
<dd>Encoding of the document, as specified when parsing or when determining encoding type. Use this instead of <a href="https://php.net/manual/en/class.domdocument.php#domdocument.props.encoding"><code>\DOMDocument::encoding</code></a>.</dd>
<dt id="document-props-quirksmode"><var>quirksMode</var></dt>
<dd>Used when parsing. Specifies which mode the document was parsed in. One of the <a href="#page_Constants">predefined quirks mode constants</a>.</dd>
</dl>
The following properties inherited from [`\DOMDocument`](https://www.php.net/manual/en/class.domdocument.php) have no effect in `Mensbeam\HTML\Document`, so therefore are not listed in the schema above:
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.actualencoding"><var>actualEncoding</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.config"><var>config</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.encoding"><var>encoding</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput"><var>formatOutput</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.preservewhitespace"><var>preserveWhiteSpace</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.recover"><var>recover</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.resolveexternals"><var>resolveExternals</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.standalone"><var>standalone</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.stricterrorchecking"><var>strictErrorChecking</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.substituteentities"><var>substituteEntities</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.validateonparse"><var>validateOnParse</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.version"><var>version</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.xmlencoding"><var>xmlEncoding</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.xmlstandalone"><var>xmlStandalone</var></a>
* <a href="https://www.php.net/manual/en/class.domdocument.php#domdocument.props.xmlversion"><var>xmlVersion</var></a>

24
docs/en/030_Document_Object_Model/010_Element/010_getAttribute.md

@ -1,24 +0,0 @@
---
title: Element::getAttribute
---
Element::getAttribute — Returns value of attribute
## Description ##
```php
public Element::getAttribute ( string $qualifiedName ) : string|null
```
Gets the value of the attribute with name `qualifiedName` for the current node.
## Parameters ##
<dl>
<dt><code>qualifiedName</code></dt>
<dd>The name of the attribute.</dd>
</dl>
## Return Values ##
Returns a string on success or <code>null</code> if no attribute with the given `qualifiedName` is found. `\DOMElement::getAttribute` returns an empty string on failure which is incorrect in newer versions of the DOM.

26
docs/en/030_Document_Object_Model/010_Element/010_getAttributeNS.md

@ -1,26 +0,0 @@
---
title: Element::getAttributeNS
---
Element::getAttributeNS — Returns value of attribute
## Description ##
```php
public Element::getAttribute ( string|null $namespace , string $localName ) : string|null
```
Gets the value of the attribute in namespace `namespace` with local name `localName` for the current node.
## Parameters ##
<dl>
<dt><code>namespace</code></dt>
<dd>The namespace URI.</dd>
<dt><code>localName</code></dt>
<dd>The local name of the attribute.</dd>
</dl>
## Return Values ##
Returns a string on success or <code>null</code> if no attribute with the given `localName` and `namespace` is found. `\DOMElement::getAttribute` returns an empty string on failure which is incorrect in newer versions of the DOM.

100
docs/en/030_Document_Object_Model/010_Element/index.md

@ -1,100 +0,0 @@
---
title: Element
---
# The Element Class #
## Introduction ##
<div class="admonition"><p><strong>Note:</strong> Only new methods and methods which make outward-facing changes from <a href="https://www.php.net/manual/en/class.domelement.php">\DOMElement</a> will be documented here, otherwise they will be linked back to PHP's documentation.</p></div>
## Class Synopsis ##
<pre><code class="php">MensBeam\HTML\Element extends <a href="https://www.php.net/manual/en/class.domelement.php">\DOMElement</a> {
use <a href="../ContainerNode/index.html">ContainerNode</a>, <a href="../Moonwalk/index.html">Moonwalk</a>, <a href="../Walk/index.html">Walk</a>;
/* Properties */
public readonly NodeList|null <a href="#element-props-classlist">$classList</a> ;
public string <a href="#element-props-innerhtml">$innerHTML</a> ;
public string <a href="#element-props-outerhtml">$outerHTML</a> ;
/* Inherited properties */
public readonly string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodename">$nodeName</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodevalue">$nodeValue</a> ;
public readonly int <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nodetype">$nodeType</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.parentnode">$parentNode</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnodelist.php">\DOMNodeList</a> <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.childnodes">$childNodes</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.firstchild">$firstChild</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.lastchild">$lastChild</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.previoussibling">$previousSibling</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.nextsibling">$nextSibling</a> ;
public readonly <a href="https://www.php.net/manual/en/class.domnamednodemap.php">\DOMNamedNodeMap</a>|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.attributes">$attributes</a> ;
public readonly Document|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.ownerdocument">$ownerDocument</a> ;
public readonly string|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.namespaceuri">$namespaceURI</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.prefix">$prefix</a> ;
public readonly string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.localname">$localName</a> ;
public readonly string|null <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.baseuri">$baseURI</a> ;
public string <a href="https://www.php.net/manual/en/class.domnode.php#domnode.props.textcontent">$textContent</a> ;
/* Methods */
public <a href="getAttribute.html">getAttribute</a> ( string $qualifiedName ) : string|null
public <a href="getAttributeNS.html">getAttributeNS</a> ( string|null $namespace , string $localName ) : string|null
/* Trait Methods */
public <a href="../ContainerNode/appendChild.html">ContainerNode::appendChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="../Node/C14N.html">Node::C14N</a> ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
public <a href="../Node/C14NFile.html">Node::C14NFile</a> ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
public <a href="../ContainerNode/insertBefore.html">ContainerNode::insertBefore</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null $child = null ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="../Moonwalk/moonwalk.html">Moonwalk::moonwalk</a> ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a>|null $filter = null ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
public <a href="../Walk/walk.html">Walk::walk</a> ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a>|null $filter = null ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
/* Magic Methods */
public __toString() : string
/* Inherited Methods */
public <a href="https://www.php.net/manual/en/domelement.construct.php">__construct</a> ( string $qualifiedName , string|null $value = null , string $namespace = "" )
public <a href="https://www.php.net/manual/en/domnode.clonenode.php">\DOMNode::cloneNode</a> ( bool $deep = false ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domelement.getattributenode.php">\DOMElement::getAttributeNode</a> ( string $qualifiedName ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|false
public <a href="https://www.php.net/manual/en/domelement.getattributenodens.php">\DOMElement::getAttributeNodeNS</a> ( string|null $namespace , string $localName ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|null
public <a href="https://www.php.net/manual/en/domelement.getelementsbytagname.php">\DOMElement::getElementsByTagName</a> ( string $qualifiedName ) : <a href="https://www.php.net/manual/en/class.domnodelist.php">\DOMNodeList</a>
public <a href="https://www.php.net/manual/en/domelement.getelementsbytagnamens.php">\DOMElement::getElementsByTagNameNS</a> ( string $namespace , string $localName ) : <a href="https://www.php.net/manual/en/class.domnodelist.php">\DOMNodeList</a>
public <a href="https://www.php.net/manual/en/domnode.getlineno.php">\DOMNode::getLineNo</a> ( ) : int
public <a href="https://www.php.net/manual/en/domnode.getnodepath.php">\DOMNode::getNodePath</a> ( ) : string|null
public <a href="https://www.php.net/manual/en/domelement.hasattribute.php">\DOMElement::hasAttribute</a> ( string $qualifiedName ) : bool
public <a href="https://www.php.net/manual/en/domelement.hasattributens.php">\DOMElement::hasAttributeNS</a> ( string|null $namespace , string $localName ) : bool
public <a href="https://www.php.net/manual/en/domnode.hasattributes.php">\DOMNode::hasAttributes</a> ( ) : bool
public <a href="https://www.php.net/manual/en/domnode.haschildnodes.php">\DOMNode::hasChildNodes</a> ( ) : bool
public <a href="https://www.php.net/manual/en/domnode.isdefaultnamespace.php">\DOMNode::isDefaultNamespace</a> ( string $namespace ) : bool
public <a href="https://www.php.net/manual/en/domnode.issamenode.php">\DOMNode::isSameNode</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $otherNode ) : bool
public <a href="https://www.php.net/manual/en/domnode.issupported.php">\DOMNode::isSupported</a> ( string $feature , string $version ) : bool
public <a href="https://www.php.net/manual/en/domnode.lookupnamespaceuri.php">\DOMNode::lookupNamespaceUri</a> ( string $prefix ) : string
public <a href="https://www.php.net/manual/en/domnode.lookupprefix.php">\DOMNode::lookupPrefix</a> ( string $namespace ) : string|null
public <a href="https://www.php.net/manual/en/domnode.normalize.php">\DOMNode::normalize</a> ( ) : void
public <a href="https://www.php.net/manual/en/domelement.removeattribute.php">\DOMElement::removeAttribute</a> ( string $qualifiedName ) : bool
public <a href="https://www.php.net/manual/en/domelement.removeattributenode.php">\DOMElement::removeAttributeNode</a> ( <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a> $attr ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|false
public <a href="https://www.php.net/manual/en/domelement.removeattributenodens.php">\DOMElement::removeAttributeNS</a> ( string|null $namespace , string $localName ) : void
public <a href="https://www.php.net/manual/en/domelement.setattribute.php">\DOMElement::setAttribute</a> ( string $qualifiedName , string $value ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|bool
public <a href="https://www.php.net/manual/en/domnode.removechild.php">\DOMNode::removeChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domnode.replacechild.php">\DOMNode::replaceChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="https://www.php.net/manual/en/domelement.setattributenode.php">\DOMElement::setAttributeNode</a> ( <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a> $attr ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|null|false
public <a href="https://www.php.net/manual/en/domelement.setattributenodens.php">\DOMElement::setAttributeNodeNS</a> ( <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a> $attr ) : <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a>|null|false
public <a href="https://www.php.net/manual/en/domelement.setattributens.php">\DOMElement::setAttributeNS</a> ( string|null $namespace , string $qualifiedName , string $value ) : void
public <a href="https://www.php.net/manual/en/domelement.setidattribute.php">\DOMElement::setIdAttribute</a> ( string $qualifiedName , bool $isId ) : void
public <a href="https://www.php.net/manual/en/domelement.setidattributenode.php">\DOMElement::setIdAttributeNode</a> ( <a href="https://www.php.net/manual/en/class.domattr.php">\DOMAttr</a> $attr , bool $isId ) : void
public <a href="https://www.php.net/manual/en/domelement.setidattributens.php">\DOMElement::setIdAttributeNS</a> ( string $namespace , string $qualifiedName , bool $isId ) : void
}</code></pre>
## Properties ##
<dl>
<dt id="element-props-classlist"><var>classList</var></dt>
<dd>A live <a href="../TokenList/TokenList.html">TokenList</a> collection of the class attributes of the element. This can then be used to manipulate the class list.</dd>
<dt id="element-props-innerhtml"><var>innerHTML</var></dt>
<dd>Gets or sets the HTML or XML markup contained within the element</dd>
<dt id="element-props-outerhtml"><var>outerHTML</var></dt>
<dd>Gets the serialized HTML fragment describing the element including its descendants. It can also be set to replace the element with nodes parsed from the given string.</dd>
</dl>

55
docs/en/030_Document_Object_Model/ContainerNode/010_appendChild.md

@ -1,55 +0,0 @@
---
title: ContainerNode::appendChild
---
ContainerNode::appendChild — Adds new child at the end of the children
## Description ##
```php
public ContainerNode::appendChild ( \DOMNode $node ) : \DOMNode|false
```
This function appends a child to an existing list of children or creates a new list of children. The child can be created with e.g. [`Document::createElement()`](https://www.php.net/manual/en/domdocument.createelement.php), [`Document::createTextNode()`](https://www.php.net/manual/en/domdocument.createtextnode.php) etc. or simply by using any other node.
When using an existing node it will be moved.
<div class="warning">
<p><strong>Warning</strong> Only the following element types may be appended to any node using <code>Node</code> and subject to hierarchy restrictions depending on the type of node being appended to:</p>
<ul>
<li><code>Comment</code></li>
<li><code>DocumentFragment</code></li>
<li><a href="https://www.php.net/manual/en/class.domdocumenttype.php"><code>\DOMDocumentType</code></a></li>
<li><code>Element</code></li>
<li><code>ProcessingInstruction</code></li>
<li><code>Text</code></li>
</ul>
<p>Note that <code>\DOMAttr</code> is missing from this list.</p>
</div>
## Parameters ##
<dl>
<dt><code>node</code></dt>
<dd>The new node.</dd>
</dl>
## Examples ##
**Example \#1 Adding a child to the body**
```php
<?php
namespace MensBeam\HTML;
$dom = new Document();
$dom->loadHTML('<!DOCTYPE html><html><head><title>Ook!</title></head><body></body></html>');
$node = $dom->createElement('br');
$dom->body->appendChild($node);
?>
```

40
docs/en/030_Document_Object_Model/ContainerNode/010_insertBefore.md

@ -1,40 +0,0 @@
---
title: ContainerNode::insertBefore
---
ContainerNode::insertBefore — Adds a new child before a reference node
## Description ##
```php
public ContainerNode::insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : \DOMNode|false
```
This function inserts a new node right before the reference node. If you plan to do further modifications on the appended child you must use the returned node.
When using an existing node it will be moved.
<div class="warning">
<p><strong>Warning</strong> Only the following element types may be appended to any node using <code>Node</code> and subject to hierarchy restrictions depending on the type of node being appended to:</p>
<ul>
<li><code>Comment</code></li>
<li><code>DocumentFragment</code></li>
<li><a href="https://www.php.net/manual/en/class.domdocumenttype.php"><code>\DOMDocumentType</code></a></li>
<li><code>Element</code></li>
<li><code>ProcessingInstruction</code></li>
<li><code>Text</code></li>
</ul>
<p>Note that <code>\DOMAttr</code> is missing from this list.</p>
</div>
## Parameters ##
<dl>
<dt><code>node</code></dt>
<dd>The new node.</dd>
<dt><code>child</code></dt>
<dd>The reference node. If not supplied, <code>node</code> is appended to the children.</dd>
</dl>

14
docs/en/030_Document_Object_Model/ContainerNode/index.md

@ -1,14 +0,0 @@
# The ContainerNode trait #
## Introduction ##
Allows the extended PHP DOM classes to simulate inheriting from a theoretical extended [\DOMNode](https://www.php.net/manual/en/class.domnode.php). This one implements improved DOM child insertion methods.
<pre><code class="php">trait MensBeam\HTML\ContainerNode {
use <a href="../Node/index.html">Node</a>;
public <a href="appendChild.html">appendChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
public <a href="insertBefore.html">insertBefore</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null $child = null ) : <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|false
}</code></pre>

13
docs/en/030_Document_Object_Model/LeafNode/010_appendChild.md

@ -1,13 +0,0 @@
---
title: LeafNode::appendChild
---
LeafNode::appendChild — **DISABLED**
## Description ##
```php
public LeafNode::appendChild ( \DOMNode $node ) : DOMException
```
Throws a `DOMException` upon use.

13
docs/en/030_Document_Object_Model/LeafNode/010_insertBefore.md

@ -1,13 +0,0 @@
---
title: LeafNode::insertBefore
---
LeafNode::insertBefore — **DISABLED**
## Description ##
```php
public LeafNode::insertBefore ( \DOMNode $node , \DOMNode|null $child = null ) : DOMException
```
Throws a `DOMException` upon use.

13
docs/en/030_Document_Object_Model/LeafNode/010_removeChild.md

@ -1,13 +0,0 @@
---
title: LeafNode::removeChild
---
LeafNode::removeChild — **DISABLED**
## Description ##
```php
public LeafNode::removeChild ( \DOMNode $node ) : DOMException
```
Throws a `DOMException` upon use.

13
docs/en/030_Document_Object_Model/LeafNode/010_replaceChild.md

@ -1,13 +0,0 @@
---
title: LeafNode::replaceChild
---
LeafNode::replaceChild — **DISABLED**
## Description ##
```php
public LeafNode::replaceChild ( \DOMNode $node , \DOMNode $child ) : DOMException
```
Throws a `DOMException` upon use.

16
docs/en/030_Document_Object_Model/LeafNode/index.md

@ -1,16 +0,0 @@
# The LeafNode trait #
## Introduction ##
Allows the extended PHP DOM classes to simulate inheriting from a theoretical extended [\DOMNode](https://www.php.net/manual/en/class.domnode.php). This one disables all DOM child insertion methods.
<pre><code class="php">trait MensBeam\HTML\LeafNode {
use <a href="../Node/index.html">Node</a>;
public <a href="appendChild.html">appendChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node ) : DOMException
public <a href="Node_insertBefore.html">insertBefore</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node , <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a>|null $child = null ) : DOMException
public <a href="removeChild.html">removeChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : DOMException
public <a href="replaceChild.html">replaceChild</a> ( <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $node, <a href="https://www.php.net/manual/en/class.domnode.php">\DOMNode</a> $child ) : DOMException
}</code></pre>

43
docs/en/030_Document_Object_Model/Moonwalk/010_moonwalk.md

@ -1,43 +0,0 @@
---
title: Moonwalk::moonwalk
---
Moonwalk::moonwalk — Output generator for walking up the DOM tree
## Description ##
<pre><code class="php">public Moonwalk::moonwalk ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a>|null $filter = null ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
</code></pre>
Non-standard. Creates a [`\Generator`](https://www.php.net/manual/en/class.generator.php) object for walking up the DOM tree. This is in lieu of recreating the awful [DOM TreeWalker API](https://developer.mozilla.org/en-US/docs/Web/API/Treewalker).
## Examples ##
**Example \#1 Print name of all ancestors of the H1 element**
```php
<?php
namespace MensBeam\HTML;
$dom = new Document();
$dom->loadHTML('<!DOCTYPE html><html><head><title>Ook!</title></head><body><h1>Eek</h1></body></html>');
$h1 = $dom->getElementsByTagName('h1')->item(0);
// All ancestors will be elements so there's no reason to have a filter.
$tree = $h1->moonwalk();
foreach ($tree as $t) {
echo "{$t->nodeName}\n";
}
?>
```
The above example will output something similar to:
```php
body
html
```

11
docs/en/030_Document_Object_Model/Moonwalk/index.md

@ -1,11 +0,0 @@
# The Moonwalk trait #
## Introduction ##
Allows the extended PHP DOM classes to Moonwalk up the DOM via a [`\Generator`](https://www.php.net/manual/en/class.generator.php). This is in lieu of recreating the awful [DOM TreeMoonwalker API](https://developer.mozilla.org/en-US/docs/Web/API/TreeMoonwalker).
<pre><code class="php">trait MensBeam\HTML\Moonwalk {
public <a href="Moonwalk.html">Moonwalk</a> ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a> $filter ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
}</code></pre>

13
docs/en/030_Document_Object_Model/Node/010_C14N.md

@ -1,13 +0,0 @@
---
title: Node::C14N
---
Node::C14N — **DISABLED**
## Description ##
```php
public Node::C14N ( bool $exclusive = false , bool $withComments = false , array|null $xpath = null , array|null $nsPrefixes = null ) : false
```
This function has been disabled and will always return `false`. `\DOMNode::C14N` is an extremely slow and inefficient method to serialize DOM and never should be used.

13
docs/en/030_Document_Object_Model/Node/010_C14NFile.md

@ -1,13 +0,0 @@
---
title: Node::C14NFile
---
Document::C14NFile — **DISABLED**
## Description ##
```php
public Node::C14NFile ( string $uri , bool $exclusive = false , bool $withComments = false , array|null $xpath = null , array|null $nsPrefixes = null ) : false
```
This function has been disabled and will always return `false`. `\DOMNode::C14NFile` is an extremely slow and inefficient method to serialize DOM and never should be used.

12
docs/en/030_Document_Object_Model/Node/index.md

@ -1,12 +0,0 @@
# The Node trait #
## Introduction ##
Allows the extended PHP DOM classes to simulate inheriting from a theoretical extended [\DOMNode](https://www.php.net/manual/en/class.domnode.php). It is used to disable [C14N](C14N.html) and [C14NFile](C14NFile.html).
<pre><code class="php">trait MensBeam\HTML\Node {
public <a href="C14N.html">C14N</a> ( bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
public <a href="C14NFile.html">C14NFile</a> ( string $uri , bool $exclusive = false , bool $withComments = false , null $xpath = null , null $nsPrefixes = null ) : false
}</code></pre>

45
docs/en/030_Document_Object_Model/Walk/010_walk.md

@ -1,45 +0,0 @@
---
title: Walk::walk
---
Walk::walk — Output generator for walking down the DOM tree
## Description ##
<pre><code class="php">public Walk::walk ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a>|null $filter = null ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
</code></pre>
Non-standard. Creates a [`\Generator`](https://www.php.net/manual/en/class.generator.php) object for walking down the DOM tree. This is in lieu of recreating the awful [DOM TreeWalker API](https://developer.mozilla.org/en-US/docs/Web/API/Treewalker).
## Examples ##
**Example \#1 Print name of every Element**
```php
<?php
namespace MensBeam\HTML;
$dom = new Document();
$dom->loadHTML('<!DOCTYPE html><html><head><title>Ook!</title></head><body><h1>Eek</h1></body></html>');
$tree = $dom->walk(function($node) {
return ($node instanceof Element);
});
foreach ($tree as $t) {
echo "{$t->nodeName}\n";
}
?>
```
The above example will output something similar to:
```php
html
head
title
body
h1
```

11
docs/en/030_Document_Object_Model/Walk/index.md

@ -1,11 +0,0 @@
# The Walk trait #
## Introduction ##
Allows the extended PHP DOM classes to walk down the DOM via a [`\Generator`](https://www.php.net/manual/en/class.generator.php). This is in lieu of recreating the awful [DOM TreeWalker API](https://developer.mozilla.org/en-US/docs/Web/API/Treewalker).
<pre><code class="php">trait MensBeam\HTML\Walk {
public <a href="walk.html">walk</a> ( <a href="https://www.php.net/manual/en/class.closure.php">\Closure</a> $filter ) : <a href="https://www.php.net/manual/en/class.generator.php">\Generator</a>
}</code></pre>

1
docs/en/030_Document_Object_Model/index.md

@ -1 +0,0 @@
The MensBeam HTML library works by parsing HTML strings into PHP's existing XML DOM. It, however, has to force the antiquated PHP DOM extension into working properly with modern HTML DOM by extending many of the node types. The documentation below follows PHP's doc style guide as closely as possible. Each class should be listed separately in the menu under this section.

1
docs/index.md

@ -1 +0,0 @@
Welcome to the user manual for HTML. It is included with each copy of the software, and is also [available online](https://mensbeam.com/html/en/). Please select a language above.

9
docs/theme/php/config.json

@ -1,9 +0,0 @@
{
"favicon": "<theme_url>favicon.png",
"js": [
"<theme_url>daux.min.js"
],
"css": [
"<theme_url>php.css"
]
}

2
docs/theme/php/daux.min.js

@ -1,2 +0,0 @@
var e=document.querySelectorAll(".s-content pre"),t=document.querySelector(".CodeToggler"),n="daux_code_blocks_hidden";function a(t){for(var a=0;a<e.length;a++)e[a].classList.toggle("Hidden",t);try{localStorage.setItem(n,t)}catch(e){}}t&&(e.length?function(){var e=t.querySelector(".CodeToggler__button--main");e.addEventListener("change",(function(e){a(!e.target.checked)}),!1);var r=!1;try{"false"===(r=localStorage.getItem(n))?r=!1:"true"===r&&(r=!0),r&&(a(!!r),e.checked=!r)}catch(e){}}():t.classList.add("Hidden"));var r=document.querySelector(".Collapsible__trigger");if(r){var o=document.querySelector(".Collapsible__content");r.addEventListener("click",(function(e){o.classList.contains("Collapsible__content--open")?(o.style.height=0,o.classList.remove("Collapsible__content--open"),r.setAttribute("aria-expanded","false")):(r.setAttribute("aria-expanded","true"),o.style.transitionDuration="150ms",o.style.height="".concat(o.scrollHeight,"px"),o.classList.add("Collapsible__content--open"))}))}var l=document.querySelectorAll("pre > code:not(.hljs)");if(l.length){var i=document.getElementsByTagName("head")[0],c=document.createElement("script");c.type="text/javascript",c.async=!0,c.src="".concat(window.base_url,"daux_libraries/highlight.pack.js"),c.onload=function(e){[].forEach.call(l,window.hljs.highlightBlock)},i.appendChild(c)}function s(e){var t=void 0!==e.preventDefault;t&&e.preventDefault();var n=function(e){for(var t=e;(t=t.parentNode)&&9!==t.nodeType;)if(1===t.nodeType&&t.classList.contains("Nav__item"))return t;throw new Error("Could not find a NavItem...")}(e.target),a=n.querySelector("ul.Nav");t&&n.classList.contains("Nav__item--open")?(a.style.height="".concat(a.scrollHeight,"px"),a.style.transitionDuration="150ms",a.style.height="0px",n.classList.remove("Nav__item--open")):t?(a.style.transitionDuration="150ms",a.addEventListener("transitionend",(function e(t){"0px"!==t.target.style.height&&(t.target.style.height="auto"),t.target.removeEventListener("transitionend",e)})),a.style.height="".concat(a.scrollHeight,"px"),n.classList.add("Nav__item--open")):a.style.height="auto"}for(var d,u=document.querySelectorAll(".Nav__item.has-children i.Nav__arrow"),h=u.length-1;h>=0;h--)(d=u[h]).addEventListener("click",s),d.parentNode.parentNode.classList.contains("Nav__item--open")&&s({target:d});var g=document.querySelectorAll(".Nav__item__link--nopage"),v=!0,p=!1,_=void 0;try{for(var y,m=g[Symbol.iterator]();!(v=(y=m.next()).done);v=!0){y.value.addEventListener("click",s)}}catch(e){p=!0,_=e}finally{try{v||null==m.return||m.return()}finally{if(p)throw _}}
//# sourceMappingURL=daux.min.js.map

2
docs/theme/php/php.css

File diff suppressed because one or more lines are too long

324
docs/theme/src/php.scss

@ -1,324 +0,0 @@
/* Daux imports; fonts are omitted */
@import "../../../vendor/daux/daux.io/src/css/theme_daux/vendor/normalize.scss";
@import "../../../vendor/daux/daux.io/src/css/theme_daux/_variables.scss";
@import "../../../vendor/daux/daux.io/src/css/theme_daux/_mixins.scss";
@import "../../../vendor/daux/daux.io/src/css/theme_daux/_structure.scss";
@import "../../../vendor/daux/daux.io/src/css/theme_daux/_typography.scss";
@import "../../../vendor/daux/daux.io/src/css/theme_daux/_components.scss";
@import "../../../vendor/daux/daux.io/src/css/theme_daux/_homepage.scss";
@import "../../../vendor/daux/daux.io/src/css/theme_daux/_print.scss" print;
/* Overrides */
:root {
--font-family-text: sans-serif;
--font-family-monospace: "Operator Mono SSm", "Operator Mono", monospace;
--font-family-heading: sans-serif;
--type-size-1: 1.75rem;
--type-size-2: 1.5rem;
--type-size-3: 1.25rem;
--type-size-4: 1.125rem;
--type-size-5: 1rem;
--type-size-6: 1rem;
--purple: #4f5b93;
--tyrian: #793862;
--light-purple: #8892bf;
--lighter-purple: #c4c9df;
--danger: #f4dfdf;
--page: #f2f2f2;
--text: #333;
--red: #e63c2f;
--blue: #15284b;
--light-blue: #93b7bb;
--beige: #e8d5d3;
--green: #2c9a42;
--dark-gray: color(var(--page) blend(var(--text) 75%));
--gray: color(var(--page) blend(var(--text) 50%));
--light-gray: color(var(--page) blend(var(--text) 25%));
--lighter-gray: color(var(--page) blend(var(--text) 12.5%));
--lightest-gray: color(#fff blend(var(--page) 75%));
--dark: var(--text);
--light: var(--light-purple);
--sidebar-background: var(--text);
--sidebar-link-active-background: var(--tyrian);
--sidebar-link-color: var(--page);
--sidebar-link-secondary-color: var(--page);
--sidebar-collapsible--hamburger-color: var(--beige);
--link-color: #369;
--brand-color: #fff;
--brand-background: var(--purple);
--code-tag-background-color: transparent;
--code-tag-border-radius: 0;
--code-tag-box-shadow: none;
--homepage-navbar-background: var(--red);
--hero-button-block-background: var(--beige);
--homepage-hero-background: #fff;
--content-floating-blocks-background: var(--blue);
}
body {
line-height: 1.618;
font-size: 16px;
color: var(--text) !important;
}
body, .Columns__right__content {
background-color: var(--page);
}
a.Link--external::after {
content: '';
}
.Page__header h1 {
font-size: var(--type-size-6);
border-bottom: 0;
margin-bottom: 0;
}
.s-content {
h1, h2, h3, h4, h5, h6 {
margin-bottom: 1.5rem;
}
h1 {
font-size: var(--type-size-1);
}
h2 {
font-size: var(--type-size-2);
}
h3 {
font-size: var(--type-size-3);
}
h4 {
font-size: var(--type-size-4);
}
h5 {
font-size: var(--type-size-5);
}
h6 {
font-size: var(--type-size-6);
}
code {
padding-top: 0;
padding-bottom: 0;
padding: 0;
border: 0;
margin: 0;
&::before, &::after {
display: none;
}
pre & {
display: inline;
}
}
table {
border-collapse: separate;
border-spacing: 2px;
border: 2px solid var(--gray);
thead, tbody {
background-color: #fff;
}
tr {
border-top: 0;
&:nth-child(2n) {
background-color: transparent;
td {
background-color: #fff;
}
}
}
th, td {
border: 0;
}
th {
background-color: var(--lighter-purple);
}
}
}
.s-content table, .Nav__item .Nav__item {
font-size: 1rem;
}
.Brand, h1, h2, h3, h4, h5, h6 {
font-weight: 600;
font-stretch: condensed;
}
h1, h2, h3, h4, h5, h6 {
color: var(--tyrian);
border-bottom: 1px dotted var(--text);
padding-bottom: 5px;
}
.Button {
border-radius: 0;
}
.HomepageButtons .Button--hero {
font-weight: normal;
font-size: var(--type-size-6);
}
.Page__header {
border-bottom: 0;
}
.Pager li > a {
border: 2px solid var(--lighter-gray);
border-radius: 0;
&:hover, &:focus {
background-color: var(--lighter-gray);
}
}
.Pager--prev a::before {
content: "\2190\00a0";
}
.Pager--next a::after {
content: "\00a0\2192";
}
.Navbar {
height: auto;
box-shadow: none;
.Brand {
float: none;
line-height: inherit;
height: auto;
}
}
.Homepage {
padding-top: 10px !important;
}
.Nav__item {
font-size: var(--type-size-6);
}
.Nav .Nav .Nav__item a {
padding-left: 35px;
}
.Nav__arrow:before {
margin: 0 0 0 -.25em;
top: auto;
bottom: calc(50% - 0.0625em);
width: 0.375em;
height: 0.375em;
transform-origin: center;
}
.Nav__arrow:before, .Nav .Nav .Nav__item a .Nav__arrow:before {
border-right-color: var(--page);
border-top-color: var(--page);
}
.admonition {
padding: 0.75rem;
margin: 1.5rem 0;
border: 1px solid var(--light-gray);
background-color: #fff;
p:last-child {
margin-bottom: 0;
}
.danger {
background-color: var(--danger);
border-color: color(var(--danger) blend(var(--text) 25%));
}
}
.hljs, .s-content pre {
background: var(--blue);
color: var(--beige);
}
.hljs {
display: block;
overflow-x: auto;
padding: 0.5em;
}
.hljs-emphasis {
font-style: italic;
}
.hljs-strong {
font-weight: bold;
}
.hljs-comment, .hljs-quote {
color: #978e9c;
}
/* Green */
.hljs-keyword, .hljs-selector-tag, .hljs-addition {
color: #acb39a;
}
/* Cyan */
.hljs-number, .hljs-string, .hljs-meta .hljs-meta-string, .hljs-literal, .hljs-doctag, .hljs-regexp {
color: var(--light-blue);
}
/* Blue */
.hljs-title, .hljs-section, .hljs-name, .hljs-selector-id, .hljs-selector-class {
color: #82b7e5;
}
/* Yellow */
.hljs-attribute, .hljs-attr, .hljs-variable, .hljs-template-variable, .hljs-class .hljs-title, .hljs-type {
color: #c5b031;
}
/* Orange */
.hljs-symbol, .hljs-bullet, .hljs-subst, .hljs-meta, .hljs-meta .hljs-keyword, .hljs-selector-attr, .hljs-selector-pseudo, .hljs-link {
color: #ea8031;
}
/* Red */
.hljs-built_in, .hljs-deletion {
color: var(--red);
}
.hljs-formula {
background: #686986;
}
@media (--viewport-large) {
.Columns__left {
border: 0;
}
}

2
lib/DOM/AbstractDocument.php → lib/AbstractDocument.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
// Exists so Document can extend methods from its traits.
abstract class AbstractDocument extends \DOMDocument {

184
lib/ActiveFormattingElementsList.php

@ -1,184 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
# 8.2.3.3. The list of active formatting elements
# Initially, the list of active formatting elements is empty. It is used to
# handle mis-nested formatting element tags.
#
# The list contains elements in the formatting category, and markers. The
# markers are inserted when entering applet, object, marquee, template, td, th,
# and caption elements, and are used to prevent formatting from "leaking" into
# applet, object, marquee, template, td, th, and caption elements.
#
# In addition, each element in the list of active formatting elements is
# associated with the token for which it was created, so that further elements
# can be created for that token if necessary.
class ActiveFormattingElementsList extends Stack {
protected $_storage = [];
public function offsetSet($offset, $value) {
$count = $this->count;
assert($offset >= 0 && $offset <= $count, new Exception(Exception::STACK_INVALID_INDEX, $offset));
assert($value instanceof ActiveFormattingElementsMarker || (
is_array($value)
&& count($value) === 2
&& isset($value['token'])
&& isset($value['element'])
&& $value['token'] instanceof StartTagToken
&& $value['element'] instanceof \DOMElement
), new Exception(Exception::STACK_INVALID_VALUE));
if ($value instanceof ActiveFormattingElementsMarker) {
$this->_storage[$offset ?? $count] = $value;
} elseif ($count && ($offset ?? $count) === $count) {
# When the steps below require the UA to push onto the list of active formatting
# elements an element element, the UA must perform the following steps:
// First find the position of the last marker, if any
$lastMarker = -1;
foreach ($this as $pos => $item) {
if ($item instanceof ActiveFormattingElementsMarker) {
$lastMarker = $pos;
break;
}
}
# If there are already three elements in the list of active formatting
# elements after the last marker, if any, or anywhere in the list if there are
# no markers, that have the same tag name, namespace, and attributes as element,
# then remove the earliest such element from the list of active formatting
# elements.
$pos = $count - 1;
$matches = 0;
if ($pos > $lastMarker) {
do {
$matches += (int) $this->matchElement($value['element'], $this->_storage[$pos]['element']);
// Stop once there are three matches or the marker is reached
} while ($matches < 3 && (--$pos) > $lastMarker);
}
if ($matches === 3) {
$this->offsetUnset($pos);
}
# Add element to the list of active formatting elements.
$this->_storage[] = $value;
} else {
$this->_storage[$offset ?? $count] = $value;
}
$this->count = count($this->_storage);
}
protected function matchElement(\DOMElement $a, \DOMElement $b): bool {
// Compare elements as part of pushing an element onto the stack
# 1. If there are already three elements in the list of active formatting
# elements after the last marker, if any, or anywhere in the list if there are
# no markers, that have the same tag name, namespace, and attributes as element,
# then remove the earliest such element from the list of active formatting
# elements.
# For these purposes, the attributes must be compared as they were
# when the elements were created by the parser; two elements have the same
# attributes if all their parsed attributes can be paired such that the two
# attributes in each pair have identical names, namespaces, and values (the
# order of the attributes does not matter).
if (
$a->nodeName !== $b->nodeName
|| $a->namespaceURI !== $b->namespaceURI
|| $a->attributes->length !== $b->attributes->length
) {
return false;
}
foreach ($a->attributes as $attr) {
if (!$b->hasAttributeNS($attr->namespaceURI, $attr->nodeName) || $b->getAttributeNS($attr->namespaceURI, $attr->nodeName) !== $attr->value) {
return false;
}
}
return true;
}
public function insert(StartTagToken $token, \DOMElement $element, ?int $at = null): void {
assert($at === null || ($at >= 0 && $at <= $this->count), new Exception(Exception::STACK_INVALID_INDEX, $at));
if ($at === null) {
$this[] = [
'token' => $token,
'element' => $element
];
} else {
array_splice($this->_storage, $at, 0, [[
'token' => $token,
'element' => $element,
]]);
$this->count = count($this->_storage);
}
}
public function insertMarker(): void {
$this[] = new ActiveFormattingElementsMarker;
}
public function clearToTheLastMarker(): void {
# When the steps below require the UA to clear the list of active formatting
# elements up to the last marker, the UA must perform the following steps:
# 1. Let entry be the last (most recently added) entry in the list of active
# formatting elements.
# 2. Remove entry from the list of active formatting elements.
# 3. If entry was a marker, then stop the algorithm at this point. The list has
# been cleared up to the last marker.
# 4. Go to step 1.
while ($this->_storage) {
$popped = array_pop($this->_storage);
if ($popped instanceof ActiveFormattingElementsMarker) {
break;
}
}
$this->count = count($this->_storage);
}
public function findSame(\DOMElement $target): int {
foreach ($this as $k => $entry) {
if (!$entry instanceof ActiveFormattingElementsMarker && $entry['element']->isSameNode($target)) {
return $k;
}
}
return -1;
}
public function findToMarker(string ...$name): int {
foreach ($this as $k => $entry) {
if ($entry instanceof ActiveFormattingElementsMarker) {
return -1;
}
if (in_array($entry['element']->nodeName, $name)) {
return $k;
}
}
return -1;
}
public function removeSame(\DOMElement $target): void {
$pos = $this->findSame($target);
if ($pos > -1) {
unset($this[$pos]);
}
}
/** @codeCoverageIgnore */
public function __toString(): string {
$out = [];
foreach ($this as $entry) {
if ($entry instanceof ActiveFormattingElementsMarker) {
$out[] = "|";
} else {
$node = $entry['element'];
$ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE;
$prefix = Parser::NAMESPACE_MAP[$ns] ?? "?";
$prefix .= $prefix ? " " : "";
$out[] = $prefix.$node->nodeName;
}
}
return implode(" - ", $out);
}
}
class ActiveFormattingElementsMarker {
}

19
lib/CharacterReference.php

File diff suppressed because one or more lines are too long

432
lib/Charset.php

@ -1,432 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
use MensBeam\Intl\Encoding;
use MensBeam\Mime\MimeType;
abstract class Charset {
/** Finds a Unicode byte order mark in a byte stream
* and returns the detected encoding, if any */
public static function fromBOM(string $data): ?string {
if (substr($data, 0, 3) === "\u{FEFF}") {
return "UTF-8";
} elseif (substr($data, 0, 2) === "\xFE\xFF") {
return "UTF-16BE";
} elseif (substr($data, 0, 2) === "\xFF\xFE") {
return "UTF-16LE";
} else {
return null;
}
}
/** Matches an encoding label (e.g. "utf-8") to its canonical name.
*
* @param string $value The encoding label to match
*/
public static function fromCharset(string $value): ?string {
$encoding = Encoding::matchLabel($value);
if ($encoding) {
return $encoding['name'];
}
return null;
}
/** Extracts an encoding from an HTTP Content-Type header-field
* and returns the associated canonical encoding name.
*
* @param string $contentType The value of a Content-Type header-field
*/
public static function fromTransport(string $contentType): ?string {
$type = MimeType::parseBytes($contentType);
if ($type && isset($type->params['charset'])) {
$encoding = Encoding::matchLabel($type->params['charset']);
if ($encoding) {
return $encoding['name'];
}
}
return null;
}
/** Inspects the head of an HTML string to guess its encoding
*
* @param string $data The HTML string to scan
* @param int $endAfter The number of bytes of the string to stop after
*/
public static function fromPrescan(string $data, int $endAfter = 1024): ?string {
# When an algorithm requires a user agent to prescan a byte stream to
# determine its encoding, given some defined end condition, then it
# must run the following steps.
# These steps either abort unsuccessfully or return a character
# encoding. If at any point during these steps (including during
# instances of the get an attribute algorithm invoked by this one)
# the user agent either runs out of bytes (meaning the position
# pointer created in the first step below goes beyond the end of the
# byte stream obtained so far) or reaches its end condition, then
# abort the prescan a byte stream to determine its encoding
# algorithm unsuccessfully.
$s = substr($data, 0, $endAfter);
$endAfter = strlen($s);
# Let position be a pointer to a byte in the input byte stream,
# initially pointing at the first byte.
$pos = 0;
# Loop: If position points to:
while ($pos < $endAfter) {
// OPTIMIZATION: Start my skipping anything not a less-than sign
if (@$s[$pos] === "<") {
$pos++;
# A sequence of bytes starting with: 0x3C 0x21 0x2D 0x2D (`<!--`)
if (@$s[$pos] === "!" && @$s[$pos + 1] === "-" && @$s[$pos + 2] === "-") {
# Advance the position pointer so that it points at the
# first 0x3E byte which is preceded by two 0x2D bytes
# (i.e. at the end of an ASCII '-->' sequence) and
# comes after the 0x3C byte that was found.e (The two
# 0x2D bytes can be the same as those in the '<!--'
# sequence.)
$pos = (strpos($s, "-->", $pos) ?: $endAfter) + 3;
}
# A sequence of bytes starting with: 0x3C, 0x4D or 0x6D,
# 0x45 or 0x65, 0x54 or 0x74, 0x41 or 0x61, and one of
# 0x09, 0x0A, 0x0C, 0x0D, 0x20, 0x2F (case-insensitive
# ASCII '<meta' followed by a space or slash)
elseif (preg_match("<^meta[\x09\x0A\x0C\x0D /]$>i", substr($s, $pos, 5))) {
# Advance the position pointer so that it points at
# the next 0x09, 0x0A, 0x0C, 0x0D, 0x20, or 0x2F
# byte (the one in sequence of characters matched above).
$pos += 5;
# Let attribute list be an empty list of strings.
# Let got pragma be false.
# Let need pragma be null.
# Let charset be the null value (which, for the purposes
# of this algorithm, is distinct from an unrecognized
# encoding or the empty string).
$attrList = [];
$gotPragma = false;
$needPragma = null;
$charset = null;
# Attributes: Get an attribute and its value.
# If no attribute was sniffed, then jump to the processing step below.
while ($attr = self::getAttribute($s, $pos)) {
# If the attribute's name is already in attribute list,
# then return to the step labeled attributes.
if (isset($attrList[$attr['name']])) {
continue;
}
# Add the attribute's name to attribute list.
$attrList[$attr['name']] = true;
# Run the appropriate step from the following list, if one applies:
# If the attribute's name is "http-equiv"
if ($attr['name'] === "http-equiv") {
# If the attribute's value is "content-type", then set got pragma to true.
if ($attr['value'] === "content-type") {
$gotPragma = true;
}
}
# If the attribute's name is "content"
elseif ($attr['name'] === "content") {
# Apply the algorithm for extracting a character encoding from a meta
# element, giving the attribute's value as the string to parse.
# If a character encoding is returned, and if charset is still set to
# null, let charset be the encoding returned, and set need pragma to true.
// OPTIMIZATION: Check if charset is null before performing the algorithm
if ($charset === null && $candidate = self::fromMeta($attr['value'])) {
$charset = $candidate;
$needPragma = true;
}
}
# If the attribute's name is "charset"
elseif ($attr['name'] === "charset") {
# Let charset be the result of getting an encoding from the attribute's
# value, and set need pragma to false.
$candidate = self::fromCharset($attr['value']);
$charset = $candidate ?? false; // false signifies 'failure'
$needPragma = false;
}
}
# Processing: If need pragma is null, then jump to the step below labeled next byte.
# If need pragma is true but got pragma is false, then jump to the step below labeled next byte.
if ($needPragma === null || ($needPragma && !$gotPragma)) {
continue;
}
# If charset is failure, then jump to the step below labeled next byte.
if ($charset === false) {
$pos++;
continue;
}
# If charset is a UTF-16 encoding, then set charset to UTF-8.
elseif ($charset === "UTF-16" || $charset === "UTF-16LE" || $charset === "UTF-16BE") {
$charset = "UTF-8";
}
# If charset is x-user-defined, then set charset to windows-1252.
elseif ($charset === "x-user-defined") {
$charset = "windows-1252";
}
# Abort the prescan a byte stream to determine its encoding algorithm,
# returning the encoding given by charset.
return $charset;
}
# A sequence of bytes starting with a 0x3C byte (<), optionally a 0x2F byte (/),
# and finally a byte in the range 0x41-0x5A or 0x61-0x7A (A-Z or a-z)
elseif ((@$s[$pos] === "/" && ctype_alpha(@$s[$pos + 1])) || (ctype_alpha(@$s[$pos]))) {
# Advance the position pointer so that it points at the next
# 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), or 0x3E (>) byte.
while (!in_array(@$s[++$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", ">", ""]));
# Repeatedly get an attribute until no further attributes can be found,
# then jump to the step below labeled next byte.
while(self::getAttribute($s, $pos));
}
# A sequence of bytes starting with: 0x3C 0x21 (`<!`)
# A sequence of bytes starting with: 0x3C 0x2F (`</`)
# A sequence of bytes starting with: 0x3C 0x3F (`<?`)
elseif (in_array(@$s[$pos], ["!", "/", "?"])) {
# Advance the position pointer so that it points at the first
# 0x3E byte (>) that comes after the 0x3C byte that was found.
$pos = (strpos($s, ">", $pos) ?: $endAfter) + 1;
}
}
# Any other byte
else {
# Do nothing with that byte.
$pos++;
}
}
return null;
}
/** Scans an attribute during the encoding detection pre-scan */
protected static function getAttribute(string $s, &$pos): array {
# When the prescan a byte stream to determine its encoding
# algorithm says to get an attribute, it means doing this:
# If the byte at position is one of
# 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP),
# or 0x2F (/) then advance position to the next byte and
# redo this step.
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) {
$pos++;
}
$char = @$s[$pos];
# If the byte at position is 0x3E (>),
# then abort the get an attribute algorithm. There isn't one.
if ($char === ">") {
return [];
}
# Otherwise, the byte at position is the start of the attribute name.
# Let attribute name and attribute value be the empty string.
$name = "";
$value = "";
# Process the byte at position as follows:
while ($char !== "") {
# If it is 0x3D (=), and the attribute name is longer than the empty string
if ($char === "=" && $name !== "") {
# Advance position to the next byte and jump to the step below labeled value.
$pos++;
goto value;
}
# If it is 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), or 0x20 (SP)
elseif (in_array($char, ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
goto spaces;
}
# If it is 0x2F (/) or 0x3E (>)
elseif ($char === "/" || $char === ">") {
# Abort the get an attribute algorithm.
# The attribute's name is the value of attribute name, its value is the empty string.
return ['name' => $name, 'value' => $value];
}
# If it is in the range 0x41 (A) to 0x5A (Z)
# Anything else
else {
# Append the code point with the same value as the byte at position to attribute name.
# (It doesn't actually matter how bytes outside the ASCII range are handled here,
# since only ASCII bytes can contribute to the detection of a character encoding.)
// OPTIMIZATION: Also handle uppercase characters
$name .= strtolower($char);
}
# Advance position to the next byte and return to the previous step.
$char = @$s[++$pos];
}
if ($char === "") {
// Out of bytes
return [];
}
spaces:
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
$pos++;
}
$char = @$s[$pos];
if ($char === "") {
// Out of bytes
return [];
}
# If the byte at position is not 0x3D (=), abort the get an attribute algorithm.
# The attribute's name is the value of attribute name, its value is the empty string.
if ($char !== "=") {
return ['name' => $name, 'value' => $value];
}
# Advance position past the 0x3D (=) byte.
$char = @$s[++$pos];
value:
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
$pos++;
}
$char = @$s[$pos];
if ($char === "") {
// Out of bytes
return [];
}
# Process the byte at position as follows:
# If it is 0x22 (") or 0x27 (')
if ($char === "'" || $char === '"') {
# Let b be the value of the byte at position.
$b = $char;
# Quote loop: Advance position to the next byte.
while (($char = @$s[++$pos]) !== "") {
# If the value of the byte at position is the value of b,
# then advance position to the next byte and abort
# the "get an attribute" algorithm.
# The attribute's name is the value of attribute name,
# and its value is the value of attribute value.
if ($char === $b) {
$pos++;
return ['name' => $name, 'value' => $value];
}
# Otherwise, append a code point to attribute value whose
# value is the same as the value of the byte at position.
// OPTIMIZATION: Also handle uppercase characters
$value .= strtolower($char);
}
// Out of bytes
return [];
}
# If it is 0x3E (>)
elseif ($char === ">") {
# Abort the get an attribute algorithm.
# The attribute's name is the value of attribute name,
# its value is the empty string.
return ['name' => $name, 'value' => $value];
}
# Anything else
else {
# Append a code point with the same value as the byte at position to attribute value.
# Advance position to the next byte.
// OPTIMIZATION: Also handle uppercase characters
$value .= strtolower($char);
while (($char = @$s[++$pos]) !== "") {
# Process the byte at position as follows:
# If it is 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), or 0x3E (>)
if (in_array($char, ["\x09", "\x0A", "\x0C", "\x0D", " ", ">"])) {
# Abort the get an attribute algorithm.
# The attribute's name is the value of attribute name
# and its value is the value of attribute value.
return ['name' => $name, 'value' => $value];
}
# If it is in the range 0x41 (A) to 0x5A (Z)
# Anything else
else {
# Append a code point with the same value as
# the byte at position to attribute value.
$value .= strtolower($char);
}
}
// Out of bytes
return [];
}
}
/** Interprets a quasi-Content-Type value during the encoding detection pre-scan */
protected static function fromMeta(string $s): ?string {
# The algorithm for extracting a character encoding from a meta element,
# given a string s, is as follows.
# It either returns a character encoding or nothing.
# Let position be a pointer into s, initially pointing at the start of the string.
$pos = 0;
$end = strlen($s);
# Loop:
while ($pos < $end) {
# Find the first seven characters in s after position
# that are an ASCII case-insensitive match for the word "charset".
# If no such match is found, return nothing.
$found = stripos($s, "charset", $pos);
if ($found === false) {
return null;
}
$pos = $found + 7;
# Skip any ASCII whitespace that immediately follow the word "charset"
# (there might not be any).
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
$pos++;
}
# If the next character is not a U+003D EQUALS SIGN (=),
# then move position to point just before that next
# character, and jump back to the step labeled loop.
if (@$s[$pos] !== "=") {
continue;
}
# Skip any ASCII whitespace that immediately follow the equals sign
# (there might not be any).
while (in_array(@$s[++$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "]));
# Process the next character as follows:
$char = @$s[$pos];
# If it is a U+0022 QUOTATION MARK character (")...
# If it is a U+0027 APOSTROPHE character (')...
if ($char === '"' || $char === "'") {
# ... and there is a later U+0022 QUOTATION MARK character (") in s
# ... and there is a later U+0027 APOSTROPHE character (') in s
if (($end = strpos($s, $char, $pos + 1)) !== false) {
$pos++;
return self::fromCharset(substr($s, $pos, $end - $pos));
}
# If it is an unmatched U+0022 QUOTATION MARK character (")
# If it is an unmatched U+0027 APOSTROPHE character (')
else {
# Return nothing
return null;
}
}
# There is no next character
elseif ($char === "") {
# Return nothing
return null;
}
# Anything else
else {
# Return the result of getting an encoding from the substring
# that consists of this character up to but not including
# the first ASCII whitespace or U+003B SEMICOLON (;)
# character, or the end of s, whichever comes first.
$size = -1;
while (!in_array(@$s[$pos + (++$size)], ["\x09", "\x0A", "\x0C", "\x0D", " ", ";", ""]));
return self::fromCharset(substr($s, $pos, $size));
}
}
} // @codeCoverageIgnore
}

2
lib/DOM/Comment.php → lib/Comment.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
class Comment extends \DOMComment {
use LeafNode, Moonwalk, ToString;

2
lib/DOM/DOMException.php → lib/DOMException.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
class DOMException extends \Exception {
// From PHP's DOMException; keeping error codes consistent

289
lib/Data.php

@ -1,289 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
use MensBeam\Intl\Encoding;
use MensBeam\Intl\Encoding\Encoding as EncodingEncoding;
class Data {
use ParseErrorEmitter;
// Used to get the file path for error reporting.
public $filePath;
// Whether the encoding is certain or tentative; this is a feature of the specification, but not relevant for this implementation
public $encodingCertain = false;
// The canonical name of the encoding
public $encoding;
// Internal storage for the Intl data object.
protected $data;
// Used for error reporting to display line number.
protected $_line = 1;
// Used for error reporting to display column number.
protected $_column = 0;
// array of normalized CR+LF pairs, denoted by the character offset of the LF
protected $normalized = [];
// Holds the character position and column number of each newline
protected $newlines = [];
// Holds the character position of each supplementary plane character, which count as two columns when reporting errors
protected $astrals = [];
// The character position of the forward-most input stream error emitted
protected $lastError = 0;
// Whether the EOF imaginary character has been consumed
protected $eof = false;
// Whether to track positions for reporting parse errors
protected $track = true;
const ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
const DIGIT = '0123456789';
const HEX = '0123456789ABCDEFabcdef';
const WHITESPACE = "\t\n\x0C\x0D ";
const WHITESPACE_REGEX = '/[\t\n\x0c\x0D ]+/';
const WHITESPACE_SAFE = "\t\x0C ";
public function __construct(string $data, string $filePath = 'STDIN', ParseError $errorHandler = null, ?string $encodingOrContentType = '') {
$this->errorHandler = $errorHandler ?? new ParseError;
$this->filePath = $filePath;
$encodingOrContentType = (string) $encodingOrContentType;
// don't track the current line/column position if erroro reporting has been suppressed
$this->track = (bool) (error_reporting() & \E_USER_WARNING);
# 13.2.3.2 Determining the character encoding
# User agents must use the following algorithm, called the encoding
# sniffing algorithm, to determine the character encoding to use
# when decoding a document in the first pass. This algorithm takes
# as input any out-of-band metadata available to the user agent
# (e.g. the Content-Type metadata of the document) and all the bytes
# available so far, and returns a character encoding and a confidence
# that is either tentative or certain.
// NOTE: We implement steps 1, 2, 4, 5, and 9
if ($encoding = Charset::fromBOM($data)) {
# If the result of BOM sniffing is an encoding, return that
# encoding with confidence certain.
$this->encodingCertain = true;
} elseif ($encoding = Charset::fromCharset($encodingOrContentType)) {
# If the user has explicitly instructed the user agent to override
# the document's character encoding with a specific encoding,
# optionally return that encoding with the confidence certain.
$this->encodingCertain = true;
} elseif ($encoding = Charset::fromTransport($encodingOrContentType)) {
# If the transport layer specifies a character encoding, and it is
# supported, return that encoding with the confidence certain.
$this->encodingCertain = true;
} elseif ($encoding = Charset::fromPrescan($data)) {
# Optionally prescan the byte stream to determine its encoding.
# The aforementioned algorithm either aborts unsuccessfully or
# returns a character encoding. If it returns a character
# encoding, then return the same encoding, with confidence
# tentative.
$this->encodingCertain = false;
} else {
# Otherwise, return an implementation-defined or user-specified
# default character encoding, with the confidence tentative.
$encoding = Charset::fromCharset(Parser::$fallbackEncoding) ?? "windows-1252";
$this->encodingCertain = false;
}
$this->encoding = $encoding;
$this->data = Encoding::createDecoder($encoding, $data, false, true);
}
public function consume(): string {
$char = $this->data->nextChar();
# Before the tokenization stage, the input stream must be
# preprocessed by normalizing newlines.
# Thus, newlines in HTML DOMs are represented by U+000A LF characters,
# and there are never any U+000D CR characters in the input to the tokenization stage.
if ($char === "\r") {
// if this is a CR+LF pair, skip the CR and note the normalization
if ($this->data->peekChar() === "\n") {
$char = $this->data->nextChar();
$this->normalized[$this->data->posChar()] = true;
}
// otherwise just silently change the character to LF;
// the bare CR will be trivial to process when seeking backwards
else {
$char = "\n";
}
}
// unless we're peeking, track line and column position, and whether we've hit EOF
if ($this->track) {
if ($char === "\n") {
$this->newlines[$this->data->posChar()] = $this->_column;
$this->_column = 0;
$this->_line++;
} elseif ($char === '') {
$this->eof = true;
} else {
$this->_column++;
$len = strlen($char);
$here = $this->data->posChar();
if ($this->lastError < $here) {
// look for erroneous characters
if ($len === 1) {
$ord = ord($char);
if (($ord < 0x20 && !in_array($ord, [0x0, 0x9, 0xA, 0xC])) || $ord === 0x7F) {
$this->error(ParseError::CONTROL_CHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
}
} elseif ($len === 2) {
if (ord($char[0]) == 0xC2) {
$ord = ord($char[1]);
if ($ord >= 0x80 && $ord <= 0x9F) {
$this->error(ParseError::CONTROL_CHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
}
}
} elseif ($len === 3) {
$head = ord($char[0]);
if ($head === 0xED) {
$tail = (ord($char[1]) << 8) + ord($char[2]);
if ($tail >= 0xA080 && $tail <= 0xBFBF) {
$this->error(ParseError::SURROGATE_IN_INPUT_STREAM);
$this->lastError = $here;
}
} elseif ($head === 0xEF) {
$tail = (ord($char[1]) << 8) + ord($char[2]);
if (($tail >= 0xB790 && $tail <= 0xB7AF) || $tail >= 0xBFBE) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
} elseif ($tail === 0xBFBD && $this->data->posErr === $here) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM, $this->data->posByte);
$this->lastError = $here;
}
}
} elseif ($len === 4) {
$tail = (ord($char[2]) << 8) + ord($char[3]);
if ($tail >= 0xBFBE) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
}
$this->astrals[$here] = true;
}
}
}
}
return $char;
}
public function unconsume(int $length = 1, bool $retreatPointer = true): void {
assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length));
if ($this->eof) {
$length--;
$this->eof = false;
}
while ($length-- > 0) {
$here = $this->data->posChar();
// if the previous character was a normalized CR+LF pair, we need to go back two
if (isset($this->normalized[$here])) {
$this->data->seek(-1);
}
// recalculate line and column positions, if requested
if ($retreatPointer && $this->track) {
$col = $this->newlines[$here] ?? 0;
if ($col) {
$this->_column = $col;
$this->_line--;
} else {
$this->_column--;
if ($this->astrals[$here] ?? false) {
$this->_column--;
}
}
}
$this->data->seek(-1);
}
}
public function consumeWhile(string $match, int $limit = null): string {
$start = $this->data->posChar();
$out = $this->data->asciiSpan($match, $limit);
if ($this->track) {
$this->_column += ($this->data->posChar() - $start);
}
return $out;
}
public function consumeUntil(string $match, int $limit = null): string {
$start = $this->data->posChar();
if ($this->track) {
// control characters produce parse errors
$match .= "\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F";
$out = $this->data->asciiSpanNot($match."\r\n", $limit);
$this->_column += ($this->data->posChar() - $start);
return $out;
} else {
return $this->data->asciiSpanNot($match."\r\n", $limit);
}
}
public function peek(int $length = 1): string {
assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length));
return $this->data->peekChar($length);
}
/** Returns an indexed array with the line and column positions of the requested offset from the current position */
public function whereIs(int $relativePos): array {
if ($this->track) {
if ($this->eof) {
$relativePos++;
if ($this->astrals[$this->data->posChar()] ?? false) {
$relativePos++;
}
}
if ($relativePos === 0) {
if (!$this->_column && $this->_line > 1) {
return [$this->_line - 1, $this->newlines[$this->data->posChar()] + 1];
} else {
return [$this->_line, $this->_column];
}
} elseif ($relativePos < 0) {
$pos = $this->data->posChar();
$line = $this->_line;
$col = $this->_column;
do {
// If the current position is the start of a line,
// get the column position of the end of the previous line
if (isset($this->newlines[$pos])) {
$line--;
$col = $this->newlines[$pos];
// If the newline was a normalized CR+LF pair,
// go back one extra character
if (isset($this->normalized[$pos])) {
$pos--;
}
} else {
$col--;
// supplementary plane characters count as two
if ($this->astrals[$pos] ?? false) {
$this->_column--;
}
}
$pos--;
} while (++$relativePos < 0);
return [$line, $col];
} else {
return [$this->_line, $this->_column + $relativePos];
}
} else {
return [0, 0];
}
}
public function __get($property) {
switch ($property) {
case 'column': return $this->_column;
break;
case 'line': return $this->_line;
break;
case 'pointer': return $this->data->posChar();
break;
default: return null;
}
}
}

0
lib/DOM/Document.php → lib/Document.php

2
lib/DOM/DocumentFragment.php → lib/DocumentFragment.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
class DocumentFragment extends \DOMDocumentFragment {
use ContainerNode, MoonwalkShallow, ParentNode, ToString, Walk, WalkShallow;

4
lib/DOM/Element.php → lib/Element.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
class Element extends \DOMElement {
use ContainerNode, DocumentOrElement, EscapeString, MagicProperties, Moonwalk, MoonwalkShallow, ParentNode, ToString, Walk, WalkShallow;
@ -13,7 +13,7 @@ class Element extends \DOMElement {
public function __get_classList(): ?TokenList {
// MensBeam\HTML\TokenList uses WeakReference to prevent a circular reference,
// MensBeam\HTML\DOM\TokenList uses WeakReference to prevent a circular reference,
// so it requires PHP 7.4 to work.
if (version_compare(\PHP_VERSION, '7.4.0', '>=')) {
// Only create the class list if it is actually used.

2
lib/DOM/ElementMap.php → lib/ElementMap.php

@ -5,7 +5,7 @@
*/
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
// This is a write-only map of elements which need to be kept in memory; it
// exists because values of properties on derived DOM classes are lost unless at

109
lib/Exception.php

@ -1,109 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class Exception extends \Exception {
const INVALID_CODE = 100;
const UNKNOWN_ERROR = 101;
const INCORRECT_PARAMETERS_FOR_MESSAGE = 102;
const UNREACHABLE_CODE = 103;
const PARSER_NONEMPTY_DOCUMENT = 201;
const INVALID_QUIRKS_MODE = 202;
const STACK_INVALID_INDEX = 301;
const STACK_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 302;
const STACK_ELEMENT_STRING_ARRAY_EXPECTED = 303;
const STACK_STRING_ARRAY_EXPECTED = 304;
const STACK_INCORRECTLY_EMPTY = 305;
const STACK_INVALID_STATE = 306;
const STACK_NO_CONTEXT_EXISTS = 307;
const STACK_INVALID_VALUE = 308;
const STACK_INVALID_OFFSET = 309;
const STACK_ROOT_ELEMENT_DELETE = 310;
const DATA_NODATA = 401;
const DATA_INVALID_DATA_CONSUMPTION_LENGTH = 402;
const TOKENIZER_INVALID_STATE = 501;
const TOKENIZER_INVALID_CHARACTER_REFERENCE_STATE = 502;
const TREEBUILDER_FORMELEMENT_EXPECTED = 601;
const TREEBUILDER_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 602;
const TREEBUILDER_UNEXPECTED_END_OF_FILE = 603;
const TREEBUILDER_NON_EMPTY_TARGET_DOCUMENT = 604;
const TREEBUILDER_INVALID_TOKEN_CLASS = 605;
const TREEBUILDER_INVALID_INSERTION_LOCATION = 606;
protected static $messages = [
100 => 'Invalid error code',
101 => 'Unknown error; escaping',
102 => 'Incorrect number of parameters for Exception message; %s expected',
103 => 'Unreachable code',
201 => 'Non-empty Document supplied as argument for Parser',
202 => 'Fragment\'s quirks mode must be one of Parser::NO_QUIRKS_MODE, Parser::LIMITED_QUIRKS_MODE, or Parser::QUIRKS_MODE',
301 => 'Invalid Stack index at %s',
302 => 'Element, Document, or DOMDocumentFragment expected for fragment context',
303 => 'Element, string, or array expected',
304 => 'String or array expected',
305 => 'Stack is incorrectly empty',
306 => 'Stack is in an invalid state; dump: %s',
307 => 'No %s context exists in stack',
308 => 'Stack value is invalid',
309 => 'Invalid stack offset; offset must be %s',
310 => 'Root element cannot be deleted from the stack',
401 => 'Data string expected; found %s',
402 => '%s is an invalid data consumption length; a value of 1 or above is expected',
501 => 'The Tokenizer has entered an invalid state: %s',
502 => 'Invalid character reference consumption state: %s',
601 => 'Form element expected, found %s',
602 => 'Element, Document, or DOMDocumentFragment expected; found %s',
603 => 'Unexpected end of file',
604 => 'Target document is not empty',
605 => 'Invalid token class: %s',
606 => 'Invalid insertion location'
];
public function __construct(int $code, ...$args) {
if (!isset(self::$messages[$code])) {
throw new self(self::INVALID_CODE);
}
$message = self::$messages[$code];
$previous = null;
if ($args) {
// Grab a previous exception if there is one.
if ($args[0] instanceof \Throwable) {
$previous = array_shift($args);
} elseif (end($args) instanceof \Throwable) {
$previous = array_pop($args);
}
}
// Count the number of replacements needed in the message.
preg_match_all('/(\%(?:\d+\$)?s)/', $message, $matches);
$count = count(array_unique($matches[1]));
// If the number of replacements don't match the arguments then oops.
if (count($args) !== $count) {
throw new self(self::INCORRECT_PARAMETERS_FOR_MESSAGE, $count);
}
if ($count > 0) {
// Go through each of the arguments and run sprintf on the strings.
$message = call_user_func_array('sprintf', array_merge([$message], $args));
}
parent::__construct($message, $code, $previous);
}
}

10
lib/LoopException.php

@ -1,10 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class LoopException extends \Exception {
}

57
lib/NameCoercion.php

@ -1,57 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
use MensBeam\Intl\Encoding\UTF8;
trait NameCoercion {
protected function coerceName(string $name): string {
// This matches the inverse of the production of NameChar in XML 1.0,
// with the added exclusion of ":" from allowed characters
// See https://www.w3.org/TR/REC-xml/#NT-NameStartChar
preg_match_all('/[^\-\.0-9\x{B7}\x{300}-\x{36F}\x{203F}-\x{2040}A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m);
foreach (array_unique($m[0], \SORT_STRING) as $c) {
$o = (new UTF8($c))->nextCode();
$esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT);
$name = str_replace($c, $esc, $name);
}
// Apply stricter rules to the first character
if (preg_match('/^[^A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m)) {
$c = $m[0];
$o = (new UTF8($c))->nextCode();
$esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT);
$name = $esc.substr($name, strlen($c));
}
return $name;
}
protected function uncoerceName(string $name): string {
preg_match_all('/U[0-9A-F]{6}/', $name, $m);
foreach (array_unique($m[0], \SORT_STRING) as $o) {
$c = UTF8::encode(hexdec(substr($o, 1)));
$name = str_replace($o, $c, $name);
}
return $name;
}
protected function escapeString(string $string, bool $attribute = false): string {
# Escaping a string (for the purposes of the algorithm above) consists of
# running the following steps:
# 1. Replace any occurrence of the "&" character by the string "&amp;".
# 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
# string "&nbsp;".
$string = str_replace(['&', "\u{A0}"], ['&amp;', '&nbsp;'], $string);
# 3. If the algorithm was invoked in the attribute mode, replace any
# occurrences of the """ character by the string "&quot;".
# 4. If the algorithm was not invoked in the attribute mode, replace any
# occurrences of the "<" character by the string "&lt;", and any
# occurrences of the ">" character by the string "&gt;".
return ($attribute) ? str_replace('"', '&quot;', $string) : str_replace(['<', '>'], ['&lt;', '&gt;'], $string);
}
}

10
lib/NotImplementedException.php

@ -1,10 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class NotImplementedException extends \Exception {
}

372
lib/OpenElementsStack.php

@ -1,372 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class OpenElementsStack extends Stack {
protected const IMPLIED_END_TAGS = [
'dd' => true,
'dt' => true,
'li' => true,
'optgroup' => true,
'option' => true,
'p' => true,
'rb' => true,
'rp' => true,
'rt' => true,
'rtc' => true,
];
protected const IMPLIED_END_TAGS_THOROUGH = [
'caption' => true,
'colgroup' => true,
'dd' => true,
'dt' => true,
'li' => true,
'optgroup' => true,
'option' => true,
'p' => true,
'rb' => true,
'rp' => true,
'rt' => true,
'rtc' => true,
'tbody' => true,
'td' => true,
'tfoot' => true,
'th' => true,
'thead' => true,
'tr' => true,
];
protected const GENERAL_SCOPE = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
],
];
protected const LIST_ITEM_SCOPE = [
// everything in general scope, and these in the HTML namespace
'ol',
'ul',
];
protected const BUTTON_SCOPE = [
// everything in general scope, and these in the HTML namespace
'button',
];
protected const TABLE_SCOPE = [
Parser::HTML_NAMESPACE => [
'html',
'table',
'template',
],
];
protected const SELECT_SCOPE = [
// all elements EXCEPT these
Parser::HTML_NAMESPACE => [
'optgroup',
'option',
],
];
/** @var ?\DOMElement */
protected $fragmentContext = null;
/** @var ?\DOMElement */
public $currentNode = null;
/** @var ?string */
public $currentNodeName = null;
/** @var ?string */
public $currentNodeNamespace = null;
/** @var ?\DOMElement */
public $adjustedCurrentNode = null;
/** @var ?string */
public $adjustedCurrentNodeName = null;
/** @var ?string */
public $adjustedCurrentNodeNamespace = null;
public function __construct(?\DOMElement $fragmentContext = null) {
$this->fragmentContext = $fragmentContext;
}
public function pop() {
$out = array_pop($this->_storage);
$this->computeProperties();
return $out;
}
public function offsetSet($offset, $value) {
assert($offset >= 0, new Exception(Exception::STACK_INVALID_INDEX, $offset));
if ($offset === null) {
$this->_storage[] = $value;
} else {
$this->_storage[$offset] = $value;
}
$this->computeProperties();
}
public function offsetUnset($offset) {
assert($offset >= 0 && $offset < count($this->_storage), new Exception(Exception::STACK_INVALID_INDEX, $offset));
array_splice($this->_storage, $offset, 1, []);
$this->computeProperties();
}
public function insert(\DOMElement $element, ?int $at = null): void {
assert($at === null || ($at >= 0 && $at <= count($this->_storage)), new Exception(Exception::STACK_INVALID_INDEX, $at));
if ($at === null) {
$this[] = $element; // @codeCoverageIgnore
} else {
array_splice($this->_storage, $at, 0, [$element]);
}
$this->computeProperties();
}
public function popUntil(string ...$target): void {
do {
$node = array_pop($this->_storage);
assert(isset($node), new Exception(Exception::STACK_INCORRECTLY_EMPTY));
} while ($node->namespaceURI !== null || !in_array($node->nodeName, $target));
$this->computeProperties();
}
public function popUntilSame(\DOMElement $target): void {
do {
$node = array_pop($this->_storage);
} while (!$node->isSameNode($target));
$this->computeProperties();
}
public function find(string ...$name): int {
foreach ($this as $k => $node) {
if ($node->namespaceURI === null && in_array($node->nodeName, $name)) {
return $k;
}
}
return -1;
}
public function findNot(string ...$name): int {
foreach ($this as $k => $node) {
if ($node->namespaceURI !== null || !in_array($node->nodeName, $name)) {
return $k;
}
}
return -1;
}
public function findSame(\DOMElement $target): int {
for ($k = (sizeof($this->_storage) - 1); $k > -1; $k--) {
if ($this->_storage[$k]->isSameNode($target)) {
return $k;
}
}
return -1;
}
public function removeSame(\DOMElement $target): void {
$pos = $this->findSame($target);
if ($pos > -1) {
unset($this[$pos]);
}
}
public function generateImpliedEndTags(string ...$exclude): void {
# When the steps below require the UA to generate implied end tags,
# then, while the current node is {elided list of element names},
# the UA must pop the current node off the stack of open elements.
#
# If a step requires the UA to generate implied end tags but lists
# an element to exclude from the process, then the UA must perform
# the above steps as if that element was not in the above list.
$map = self::IMPLIED_END_TAGS;
foreach($exclude as $name) {
$map[$name] = false;
}
while (!$this->isEmpty() && $this->top()->namespaceURI === null && ($map[$this->top()->nodeName] ?? false)) {
array_pop($this->_storage);
$this->count--;
}
$this->computeProperties();
}
public function generateImpliedEndTagsThoroughly(): void {
# When the steps below require the UA to generate all implied end tags
# thoroughly, then, while the current node is {elided list of element names},
# the UA must pop the current node off the stack of open elements.
while (!$this->isEmpty() && $this->top()->namespaceURI === null && (self::IMPLIED_END_TAGS_THOROUGH[$this->top()->nodeName] ?? false)) {
array_pop($this->_storage);
$this->count--;
}
$this->computeProperties();
}
public function clearToTableContext(): void {
# When the algorithm requires the UA to clear the stack back to a
# table context, it means that the UA must, while the current node
# is not a table, template, or html element, pop elements from the
# stack of open elements.
assert(count($this->_storage) > 0, new Exception(Exception::STACK_INCORRECTLY_EMPTY));
$pos = $this->find("table", "template", "html");
assert($pos > -1, new Exception(Exception::STACK_NO_CONTEXT_EXISTS, 'table'));
$stop = $pos + 1;
while (count($this->_storage) > $stop) {
array_pop($this->_storage);
}
$this->computeProperties();
}
public function clearToTableBodyContext(): void {
# When the steps above require the UA to clear the stack back to a
# table body context, it means that the UA must, while the current
# node is not a tbody, tfoot, thead, template, or html element,
# pop elements from the stack of open elements.
assert(count($this->_storage) > 0, new Exception(Exception::STACK_INCORRECTLY_EMPTY));
$pos = $this->find("tbody", "tfoot", "thead", "template", "html");
assert($pos > -1, new Exception(Exception::STACK_NO_CONTEXT_EXISTS, 'table body'));
$stop = $pos + 1;
while (count($this->_storage) > $stop) {
array_pop($this->_storage);
}
$this->computeProperties();
}
public function clearToTableRowContext(): void {
# When the steps above require the UA to clear the stack back to a
# table row context, it means that the UA must, while the current
# node is not a tr, template, or html element, pop elements from
# the stack of open elements.
assert(count($this->_storage) > 0, new Exception(Exception::STACK_INCORRECTLY_EMPTY));
$pos = $this->find("tr", "template", "html");
assert($pos > -1, new Exception(Exception::STACK_NO_CONTEXT_EXISTS, 'table row'));
$stop = $pos + 1;
while (count($this->_storage) > $stop) {
array_pop($this->_storage);
}
$this->computeProperties();
}
public function hasElementInScope(...$target): bool {
# The stack of open elements is said to have a particular element in scope when
# it has that element in the specific scope consisting of the following element
# types:
#
# {elided}
return $this->hasElementInScopeHandler($target, self::GENERAL_SCOPE);
}
public function hasElementInListItemScope(...$target): bool {
$scope = self::GENERAL_SCOPE;
$scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::LIST_ITEM_SCOPE);
return $this->hasElementInScopeHandler($target, $scope);
}
public function hasElementInButtonScope(...$target): bool {
$scope = self::GENERAL_SCOPE;
$scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::BUTTON_SCOPE);
return $this->hasElementInScopeHandler($target, $scope);
}
public function hasElementInTableScope(...$target): bool {
return $this->hasElementInScopeHandler($target, self::TABLE_SCOPE);
}
public function hasElementInSelectScope(...$target): bool {
# The stack of open elements is said to have a particular element
# in select scope when it has that element in the specific scope
# consisting of all element types EXCEPT the following:
#
# optgroup in the HTML namespace
# option in the HTML namespace
return $this->hasElementInScopeHandler($target, self::SELECT_SCOPE, false);
}
protected function hasElementInScopeHandler(array $targets, array $list, $matchType = true): bool {
# The stack of open elements is said to have an element target node
# in a specific scope consisting of a list of element types list
# when the following algorithm terminates in a match state:
# Initialize node to be the current node (the bottommost node of the stack).
foreach ($this as $node) {
# If node is the target node, terminate in a match state.
foreach ($targets as $target) {
if ($target instanceof \DOMElement) {
if ($node->isSameNode($target)) {
return true;
}
} else {
if ($node->namespaceURI === null && $node->nodeName === $target) {
return true;
}
}
}
# Otherwise, if node is one of the element types in list, terminate in a failure state.
$ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE;
if (in_array($node->nodeName, $list[$ns] ?? []) === $matchType) {
return false;
}
# Otherwise, set node to the previous entry in the stack of
# open elements and return to step 2. (This will never fail,
# since the loop will always terminate in the previous step
# if the top of the stack — an html element — is reached.)
}
assert(false, new Exception(Exception::STACK_INVALID_STATE, (string)$this)); // @codeCoverageIgnore
} // @codeCoverageIgnore
protected function computeProperties(): void {
$this->count = count($this->_storage);
$this->currentNode = $this->top();
# The adjusted current node is the context element if the parser was created by
# the HTML fragment parsing algorithm and the stack of open elements has only one
# element in it (fragment case); otherwise, the adjusted current node is the
# current node.
if ($this->fragmentContext && $this->count === 1) {
$this->adjustedCurrentNode = $this->fragmentContext;
} else {
$this->adjustedCurrentNode = $this->currentNode;
}
if ($this->currentNode) {
$this->currentNodeName = $this->currentNode->nodeName;
$this->currentNodeNamespace = $this->currentNode->namespaceURI;
} else {
$this->currentNodeName = null; // @codeCoverageIgnore
$this->currentNodeNamespace = null; // @codeCoverageIgnore
}
if ($this->adjustedCurrentNode) {
$this->adjustedCurrentNodeName = $this->adjustedCurrentNode->nodeName;
$this->adjustedCurrentNodeNamespace = $this->adjustedCurrentNode->namespaceURI;
} else {
$this->adjustedCurrentNodeName = null; // @codeCoverageIgnore
$this->adjustedCurrentNodeNamespace = null; // @codeCoverageIgnore
}
}
public function __toString(): string {
$out = [];
foreach ($this as $node) {
$ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE;
$prefix = Parser::NAMESPACE_MAP[$ns] ?? "?";
$prefix .= $prefix ? " " : "";
$out[] = $prefix.$node->nodeName;
}
return implode(" < ", $out);
}
}

215
lib/ParseError.php

@ -1,215 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class ParseError {
// tokenization parse errors; these have been standardized
const ENCODING_ERROR = 100;
const UNEXPECTED_NULL_CHARACTER = 101;
const UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME = 102;
const EOF_BEFORE_TAG_NAME = 103;
const INVALID_FIRST_CHARACTER_OF_TAG_NAME = 104;
const MISSING_END_TAG_NAME = 105;
const EOF_IN_TAG = 106;
const EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT = 107;
const UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME = 108;
const DUPLICATE_ATTRIBUTE = 109;
const UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME = 110;
const MISSING_ATTRIBUTE_VALUE = 111;
const UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE = 112;
const MISSING_WHITESPACE_BETWEEN_ATTRIBUTES = 113;
const UNEXPECTED_SOLIDUS_IN_TAG = 114;
const CDATA_IN_HTML_CONTENT = 115;
const INCORRECTLY_OPENED_COMMENT = 116;
const ABRUPT_CLOSING_OF_EMPTY_COMMENT = 117;
const EOF_IN_COMMENT = 118;
const NESTED_COMMENT = 119;
const INCORRECTLY_CLOSED_COMMENT = 120;
const EOF_IN_DOCTYPE = 121;
const MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME = 122;
const MISSING_DOCTYPE_NAME = 123;
const INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME = 124;
const MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD = 125;
const MISSING_DOCTYPE_PUBLIC_IDENTIFIER = 126;
const MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 127;
const ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER = 128;
const MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 129;
const MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD = 130;
const MISSING_DOCTYPE_SYSTEM_IDENTIFIER = 131;
const MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 132;
const ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER = 133;
const UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 134;
const EOF_IN_CDATA = 135;
const END_TAG_WITH_ATTRIBUTES = 136;
const END_TAG_WITH_TRAILING_SOLIDUS = 137;
const MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE = 138;
const UNKNOWN_NAMED_CHARACTER_REFERENCE = 139;
const ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE = 140;
const NULL_CHARACTER_REFERENCE = 141;
const CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE = 142;
const SURROGATE_CHARACTER_REFERENCE = 143;
const NONCHARACTER_CHARACTER_REFERENCE = 144;
const CONTROL_CHARACTER_REFERENCE = 145;
const SURROGATE_IN_INPUT_STREAM = 146;
const NONCHARACTER_IN_INPUT_STREAM = 147;
const CONTROL_CHARACTER_IN_INPUT_STREAM = 148;
// tree construction parse errors; these have not been standardized, but html5lib's error names are likely to become standard in future
const EXPECTED_DOCTYPE_BUT_GOT_START_TAG = 200;
const EXPECTED_DOCTYPE_BUT_GOT_END_TAG = 201;
const EXPECTED_DOCTYPE_BUT_GOT_CHARS = 202;
const EXPECTED_DOCTYPE_BUT_GOT_EOF = 203;
const UNKNOWN_DOCTYPE = 204;
const UNEXPECTED_DOCTYPE = 205;
const UNEXPECTED_START_TAG = 206;
const UNEXPECTED_END_TAG = 207; // html5lib also uses 'adoption-agency-1.2' and 'adoption-agency-1.3' for this
const NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS = 208;
const UNEXPECTED_START_TAG_IMPLIES_END_TAG = 209;
const UNEXPECTED_START_TAG_ALIAS = 210; // html5lib uses 'unexpected-start-tag-treated-as'
const UNEXPECTED_CHAR = 211;
const UNEXPECTED_EOF = 212;
const UNEXPECTED_PARENT = 213;
const INVALID_NAMESPACE_ATTRIBUTE_VALUE = 214;
const FOSTERED_START_TAG = 215;
const FOSTERED_END_TAG = 216;
const FOSTERED_CHAR = 217;
const MESSAGES = [
self::EXPECTED_DOCTYPE_BUT_GOT_START_TAG => 'Expected DOCTYPE but got start tag <%s>',
self::EXPECTED_DOCTYPE_BUT_GOT_END_TAG => 'Expected DOCTYPE but got end tag </%s>',
self::EXPECTED_DOCTYPE_BUT_GOT_CHARS => 'Expected DOCTYPE but got characters',
self::EXPECTED_DOCTYPE_BUT_GOT_EOF => 'Expected DOCTYPE but got end-of-file',
self::UNKNOWN_DOCTYPE => 'Unknown DOCTYPE',
self::UNEXPECTED_START_TAG => 'Unexpected start tag <%s>',
self::UNEXPECTED_END_TAG => 'Unexpected end tag </%s>',
self::NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS => 'Trailing solidus in non-void HTML element start tag <%s>',
self::UNEXPECTED_START_TAG_IMPLIES_END_TAG => 'Unexpcted non-nesting start tag <%s> in nested context',
self::UNEXPECTED_START_TAG_ALIAS => 'Start tag <%s> should be <%s>',
self::UNEXPECTED_CHAR => 'Unexpected character data',
self::UNEXPECTED_EOF => 'Unexpected end of file',
self::UNEXPECTED_PARENT => 'Start tag <%s> not valid in parent <%s>',
self::INVALID_NAMESPACE_ATTRIBUTE_VALUE => 'Invalid value for attribute "%s"; it must have value "%s" or be omitted',
self::FOSTERED_START_TAG => 'Start tag <%s> moved to before table',
self::FOSTERED_END_TAG => 'End tag </%s> moved to before table',
self::FOSTERED_CHAR => 'Character moved to before table',
self::ENCODING_ERROR => 'Corrupt encoding near byte position %s',
self::UNEXPECTED_NULL_CHARACTER => 'Unexpected null character',
self::UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME => 'Unexpected "?" character instead of tag name',
self::EOF_BEFORE_TAG_NAME => 'End-of-file before tag name',
self::INVALID_FIRST_CHARACTER_OF_TAG_NAME => 'Invalid first character "%s" of tag name',
self::MISSING_END_TAG_NAME => 'Missing end-tag name',
self::EOF_IN_TAG => 'End-of-file in tag',
self::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT => 'End-of-file in script (HTML comment-like) text',
self::UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME => 'Unexpected equals sign before attribute name',
self::DUPLICATE_ATTRIBUTE => 'Duplicate attribute "%s" in start tag',
self::UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME => 'Unexpected character "%s" in attribute name',
self::MISSING_ATTRIBUTE_VALUE => 'Missing attribute value',
self::UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE => 'Unexpected character "%s" in unquoted attribute value',
self::MISSING_WHITESPACE_BETWEEN_ATTRIBUTES => 'Missing whitespace between attributes',
self::UNEXPECTED_SOLIDUS_IN_TAG => 'Unexpected solidus in tag',
self::CDATA_IN_HTML_CONTENT => 'CDATA in HTML content',
self::INCORRECTLY_OPENED_COMMENT => 'Incorrectly opened comment',
self::ABRUPT_CLOSING_OF_EMPTY_COMMENT => 'Abrupt closing of empty comment',
self::EOF_IN_COMMENT => 'End-of-file in comment',
self::NESTED_COMMENT => 'Nested comment',
self::INCORRECTLY_CLOSED_COMMENT => 'Incorrectly closed comment',
self::EOF_IN_DOCTYPE => 'End-of-file in DOCTYPE',
self::MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME => 'Missing whitespace before DOCTYPE name',
self::MISSING_DOCTYPE_NAME => 'Missing DOCTYPE name',
self::INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME => 'Invalid character sequence after DOCTYPE name',
self::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD => 'Missing whitespace after DOCTYPE "PUBLIC" keyword',
self::MISSING_DOCTYPE_PUBLIC_IDENTIFIER => 'Missing DOCTYPE "PUBLIC" identifier',
self::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER => 'Missing quote before DOCTYPE "PUBLIC" identifier',
self::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER => 'Abrupt DOCTYPE "PUBLIC" identifier',
self::MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS => 'Missing whitespace between DOCTYPE "PUBLIC" and "SYSTEM" identifiers',
self::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD => 'Missing whitespace after DOCTYPE "SYSTEM" keyword',
self::MISSING_DOCTYPE_SYSTEM_IDENTIFIER => 'Missing DOCTYPE "SYSTEM" identifier',
self::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER => 'Missing quote before DOCTYPE "SYSTEM" identifier',
self::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER => 'Abrupt DOCTYPE "SYSTEM" identifier',
self::UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER => 'Unexpected character "%s" after DOCTYPE "SYSTEM" identifier',
self::EOF_IN_CDATA => 'End-of-file in CDATA section',
self::END_TAG_WITH_ATTRIBUTES => 'End-tag with attributes',
self::END_TAG_WITH_TRAILING_SOLIDUS => 'End-tag with trailing solidus',
self::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE => 'Missing semicolon after character reference',
self::UNKNOWN_NAMED_CHARACTER_REFERENCE => 'Unknown named character reference "%s"',
self::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE => 'Absence of digits in character reference',
self::NULL_CHARACTER_REFERENCE => 'Null character reference',
self::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE => 'Character reference outside Unicode range',
self::SURROGATE_CHARACTER_REFERENCE => 'Surrogate character rereference',
self::NONCHARACTER_CHARACTER_REFERENCE => 'Non-character character reference',
self::CONTROL_CHARACTER_REFERENCE => 'Control-character character reference',
self::SURROGATE_IN_INPUT_STREAM => 'Surrogate character in input stream',
self::NONCHARACTER_IN_INPUT_STREAM => 'Non-character character in input stream',
self::CONTROL_CHARACTER_IN_INPUT_STREAM => 'Control character in input stream',
];
const REPORT_OFFSETS = [
self::INCORRECTLY_OPENED_COMMENT => 1,
self::SURROGATE_CHARACTER_REFERENCE => 1,
self::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE => 1,
self::NONCHARACTER_CHARACTER_REFERENCE => 1,
self::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE => 1,
self::NULL_CHARACTER_REFERENCE => 1,
self::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE => 1,
self::CONTROL_CHARACTER_REFERENCE => 1,
self::UNKNOWN_NAMED_CHARACTER_REFERENCE => 1,
];
public function setHandler() {
// Set the errror handler and honor already-set error reporting rules.
set_error_handler([$this, 'errorHandler'], \E_USER_WARNING);
}
public function clearHandler() {
restore_error_handler();
}
protected function prepareMessage(string $file, int $line, int $column, int $code, ...$arg): string {
assert(isset(self::MESSAGES[$code]), new Exception(Exception::INVALID_CODE));
$message = self::MESSAGES[$code];
// Count the number of replacements needed in the message.
$count = substr_count($message, '%s');
// If the number of replacements don't match the arguments then oops.
assert(count($arg) === $count, new Exception(Exception::INCORRECT_PARAMETERS_FOR_MESSAGE, $count));
if ($count > 0) {
// Convert newlines and tabs in the arguments to words to better
// express what they are.
$arg = array_map(function($value) {
if ($value === "\n") {
return 'Newline';
} elseif ($value === "\t") {
return 'Tab';
} elseif ($value === null) {
return 'nothing';
} else {
return $value;
}
}, $arg);
// Go through each of the arguments and run sprintf on the strings.
$message = sprintf($message, ...$arg);
}
// Wrap with preamble and location
// TODO: the file path should be middle-elided when necessary so that
// the message does not exceed 1024 bytes
$message = sprintf("HTML5 Parse Error: \"%s\" in %s", $message, $file);
if ($line) {
$message .= sprintf(" on line %s, column %s", $line, $column);
}
return $message;
}
public function emit(string $file, int $line, int $column, int $code, ...$arg): bool {
return trigger_error($this->prepareMessage($file, $line, $column, $code, ...$arg), \E_USER_WARNING);
}
public function errorHandler(int $code, string $message) {
echo "$message\n";
}
}

21
lib/ParseErrorDummy.php

@ -1,21 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class ParseErrorDummy extends ParseError {
public function setHandler() {
// Do nothing
}
public function clearHandler() {
// Do nothing
}
public function emit(string $file, int $line, int $column, int $code, ...$arg): bool {
return false;
}
}

20
lib/ParseErrorEmitter.php

@ -1,20 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
trait ParseErrorEmitter {
/** @var ParseError $errorHandler */
private $errorHandler;
private function error(int $code, ...$arg): bool {
$data = ($this instanceof Data) ? $this : ($this->data ?? null);
assert($data instanceof Data);
assert($this->errorHandler instanceof ParseError);
list($line, $column) = $data->whereIs(ParseError::REPORT_OFFSETS[$code] ?? 0);
return $this->errorHandler->emit($data->filePath, $line, $column, $code, ...$arg);
}
}

100
lib/Parser.php

@ -1,100 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class Parser {
public static $fallbackEncoding = "windows-1252";
public const NO_QUIRKS_MODE = 0;
public const QUIRKS_MODE = 1;
public const LIMITED_QUIRKS_MODE = 2;
// Namespace constants
public const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
public const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
public const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
public const XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink';
public const XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace';
public const XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/';
public const NAMESPACE_MAP = [
self::HTML_NAMESPACE => "",
self::MATHML_NAMESPACE => "math",
self::SVG_NAMESPACE => "svg",
self::XLINK_NAMESPACE => "xlink",
self::XML_NAMESPACE => "xml",
self::XMLNS_NAMESPACE => "xmlns",
];
public static function parse(string $data, ?\DOMDocument $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): \DOMDocument {
// Initialize the various classes needed for parsing
$document = $document ?? new \DOMDocument;
if ((error_reporting() & \E_USER_WARNING)) {
$errorHandler = new ParseError;
} else {
$errorHandler = new ParseErrorDummy;
}
$decoder = new Data($data, $file ?? "STDIN", $errorHandler, $encodingOrContentType);
$document->documentEncoding = $decoder->encoding;
$stack = new OpenElementsStack($fragmentContext);
$tokenizer = new Tokenizer($decoder, $stack, $errorHandler);
$tokenList = $tokenizer->tokenize();
$treeBuilder = new TreeBuilder($document, $decoder, $tokenizer, $tokenList, $errorHandler, $stack, new TemplateInsertionModesStack, $fragmentContext);
// Override error handling
$errorHandler->setHandler();
try {
// run the parser to completion
$treeBuilder->constructTree();
} finally {
// Restore error handling
$errorHandler->clearHandler();
}
return $document;
}
public static function parseFragment(string $data, ?\DOMDocument $document = null, ?string $encodingOrContentType = null, ?\DOMElement $fragmentContext = null, ?String $file = null): DocumentFragment {
// Create the requisite parsing context if none was supplied
$document = $document ?? new \DOMDocument;
$tempDocument = new \DOMDocument;
$fragmentContext = $fragmentContext ?? $document->createElement("div");
// parse the fragment into the temporary document
self::parse($data, $tempDocument, $encodingOrContentType, $fragmentContext, $file);
// extract the nodes from the temp document into a fragment
$fragment = $document->createDocumentFragment();
foreach ($tempDocument->documentElement->childNodes as $node) {
$node = $document->importNode($node, true);
$fragment->appendChild($node);
}
return $fragment;
}
public static function fetchFile(string $file, ?string $encodingOrContentType = null): ?array {
$f = fopen($file, "r");
if (!$f) {
return null;
}
$data = stream_get_contents($f);
$encoding = Charset::fromCharset((string) $encodingOrContentType) ?? Charset::fromTransport((string) $encodingOrContentType);
if (!$encoding) {
$meta = stream_get_meta_data($f);
if ($meta['wrapper_type'] === "http") {
// Try to find a Content-Type header-field
foreach ($meta['wrapper_data'] as $h) {
$h = explode(":", $h, 2);
if (count($h) === 2) {
if (preg_match("/^\s*Content-Type\s*$/i", $h[0])) {
// Try to get an encoding from it
$encoding = Charset::fromTransport($h[1]);
break;
}
}
}
}
}
return [$data, $encoding];
}
}

2
lib/DOM/ProcessingInstruction.php → lib/ProcessingInstruction.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
class ProcessingInstruction extends \DOMProcessingInstruction {
use LeafNode, Moonwalk, ToString;

62
lib/Stack.php

@ -1,62 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
abstract class Stack implements \ArrayAccess, \Countable, \IteratorAggregate {
protected $_storage = [];
protected $count = 0;
public function offsetSet($offset, $value) {
assert($offset >= 0, new Exception(Exception::STACK_INVALID_INDEX, $offset));
if ($offset === null) {
$this->_storage[] = $value;
} else {
$this->_storage[$offset] = $value; // @codeCoverageIgnore
}
$this->count = count($this->_storage);
}
public function offsetExists($offset) {
return isset($this->_storage[$offset]);
}
public function offsetUnset($offset) {
assert($offset >= 0 && $offset < count($this->_storage), new Exception(Exception::STACK_INVALID_INDEX, $offset));
array_splice($this->_storage, $offset, 1, []);
$this->count = count($this->_storage);
}
public function offsetGet($offset) {
assert($offset >= 0 && $offset < count($this->_storage), new Exception(Exception::STACK_INVALID_INDEX, $offset));
return $this->_storage[$offset];
}
public function count(): int {
return $this->count;
}
public function getIterator(): \Traversable {
for ($a = $this->count - 1; $a > -1; $a--) {
yield $a => $this->_storage[$a];
}
}
public function pop() {
$this->count = max($this->count - 1, 0);
return array_pop($this->_storage);
}
public function isEmpty(): bool {
return !$this->_storage;
}
public function top(int $offset = 0) {
assert($offset >= 0, new Exception(Exception::STACK_INVALID_OFFSET, '<= 0'));
return ($c = $this->count) > $offset ? $this->_storage[$c - ($offset + 1)] : null;
}
}

2
lib/DOM/TemplateElement.php → lib/TemplateElement.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
/** Class specifically for template elements to handle its content property. */
class TemplateElement extends Element {

19
lib/TemplateInsertionModesStack.php

@ -1,19 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class TemplateInsertionModesStack extends Stack {
public function __get($property) {
assert($property === "currentMode", new \Exception("Property $property is invalid"));
switch ($property) {
case 'currentMode':
return $this->isEmpty() ? null : $this->top();
default:
return null; // @codeCoverageIgnore
}
}
}

2
lib/DOM/Text.php → lib/Text.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
class Text extends \DOMText {
use LeafNode, Moonwalk, ToString;

120
lib/Token.php

@ -1,120 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
abstract class Token {}
abstract class DataToken extends Token {
public $data;
public function __construct(string $data) {
$this->data = $data;
}
}
class DOCTYPEToken extends Token {
public const NAME = "DOCTYPE token";
# DOCTYPE tokens have a name, a public identifier,
# a system identifier, and a force-quirks flag.
# When a DOCTYPE token is created, its name,
# public identifier, and system identifier must
# be marked as missing (which is a distinct state
# from the empty string), and the force-quirks flag
# must be set to off (its other state is on).
public $forceQuirks = false;
public $name;
public $public;
public $system;
public function __construct(?string $name = null, ?string $public = null, ?string $system = null) {
// null stands in for the distinct "missing" state
$this->name = $name;
$this->public = $public;
$this->system = $system;
}
}
class CharacterToken extends DataToken {
public const NAME = "Character token";
}
class WhitespaceToken extends CharacterToken {}
class NullCharacterToken extends CharacterToken {}
class CommentToken extends DataToken {
public const NAME = "Comment token";
public function __construct(string $data = '') {
parent::__construct($data);
}
}
abstract class TagToken extends Token {
# Start and end tag tokens have a tag name,
# a self-closing flag, and a list of attributes,
# each of which has a name and a value.
# When a start or end tag token is created, its
# self-closing flag must be unset (its other state
# is that it be set), and its attributes list must be empty.
public $name;
public $namespace;
public $selfClosing;
public $selfClosingAcknowledged = false;
public $attributes = [];
public function __construct(string $name, bool $selfClosing = false, ?string $namespace = null) {
$this->selfClosing = $selfClosing;
$this->namespace = $namespace;
$this->name = $name;
}
public function hasAttribute(string $name): bool {
return ($this->_getAttributeKey($name) !== null);
}
public function getAttribute(string $name) {
$key = $this->_getAttributeKey($name);
return (isset($this->attributes[$key])) ? $this->attributes[$key] : null;
}
private function _getAttributeKey(string $name) {
foreach ($this->attributes as $key => $a) {
if ($a->name === $name) {
return $key;
}
}
return null;
}
}
class StartTagToken extends TagToken {
public const NAME = "Start tag token";
}
class EndTagToken extends TagToken {
public const NAME = "End tag token";
}
class EOFToken extends Token {
public const NAME = "EOF token";
}
class TokenAttr {
/** @var string The name of the attribute */
public $name;
/** @var string The attribute's value */
public $value;
/** @var string|null The attribute's namespace. This is normally null but may be set during tree construction */
public $namespace = null;
public function __construct(string $name, string $value) {
$this->name = $name;
$this->value = $value;
}
}

2
lib/DOM/TokenList.php → lib/TokenList.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
class TokenList implements \ArrayAccess, \Countable, \Iterator {
use MagicProperties;

3699
lib/Tokenizer.php

File diff suppressed because it is too large

4307
lib/TreeBuilder.php

File diff suppressed because it is too large

34
lib/ctype.php

@ -1,34 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
namespace MensBeam\HTML;
// This file adds shims for matching single characters
// using the same API as the ctype extension, if the
// extension is missing. They are not a complete
// replacement, as they are designed only to evaluate
// single characters
if (!extension_loaded("ctype")) {
function ctype_alnum(string $str): bool {
return ["a"=>true,"b"=>true,"c"=>true,"d"=>true,"e"=>true,"f"=>true,"g"=>true,"h"=>true,"i"=>true,"j"=>true,"k"=>true,"l"=>true,"m"=>true,"n"=>true,"o"=>true,"p"=>true,"q"=>true,"r"=>true,"s"=>true,"t"=>true,"u"=>true,"v"=>true,"w"=>true,"x"=>true,"y"=>true,"z"=>true,"A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"G"=>true,"H"=>true,"I"=>true,"J"=>true,"K"=>true,"L"=>true,"M"=>true,"N"=>true,"O"=>true,"P"=>true,"Q"=>true,"R"=>true,"S"=>true,"T"=>true,"U"=>true,"V"=>true,"W"=>true,"X"=>true,"Y"=>true,"Z"=>true,"0"=>true,"1"=>true,"2"=>true,"3"=>true,"4"=>true,"5"=>true,"6"=>true,"7"=>true,"8"=>true,"9"=>true][$str] ?? false;
}
function ctype_alpha(string $str): bool {
return ["a"=>true,"b"=>true,"c"=>true,"d"=>true,"e"=>true,"f"=>true,"g"=>true,"h"=>true,"i"=>true,"j"=>true,"k"=>true,"l"=>true,"m"=>true,"n"=>true,"o"=>true,"p"=>true,"q"=>true,"r"=>true,"s"=>true,"t"=>true,"u"=>true,"v"=>true,"w"=>true,"x"=>true,"y"=>true,"z"=>true,"A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"G"=>true,"H"=>true,"I"=>true,"J"=>true,"K"=>true,"L"=>true,"M"=>true,"N"=>true,"O"=>true,"P"=>true,"Q"=>true,"R"=>true,"S"=>true,"T"=>true,"U"=>true,"V"=>true,"W"=>true,"X"=>true,"Y"=>true,"Z"=>true][$str] ?? false;
}
function ctype_upper(string $str): bool {
return ["A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"G"=>true,"H"=>true,"I"=>true,"J"=>true,"K"=>true,"L"=>true,"M"=>true,"N"=>true,"O"=>true,"P"=>true,"Q"=>true,"R"=>true,"S"=>true,"T"=>true,"U"=>true,"V"=>true,"W"=>true,"X"=>true,"Y"=>true,"Z"=>true][$str] ?? false;
}
function ctype_digit(string $str): bool {
return ["0"=>true,"1"=>true,"2"=>true,"3"=>true,"4"=>true,"5"=>true,"6"=>true,"7"=>true,"8"=>true,"9"=>true][$str] ?? false;
}
function ctype_xdigit(string $str): bool {
return ["a"=>true,"b"=>true,"c"=>true,"d"=>true,"e"=>true,"f"=>true,"A"=>true,"B"=>true,"C"=>true,"D"=>true,"E"=>true,"F"=>true,"0"=>true,"1"=>true,"2"=>true,"3"=>true,"4"=>true,"5"=>true,"6"=>true,"7"=>true,"8"=>true,"9"=>true][$str] ?? false;
}
}

2
lib/DOM/traits/ContainerNode.php → lib/traits/ContainerNode.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
// Node in the DOM spec is dirty. Many nodes which inherit from it inherit
// methods it cannot use which all check for this and throw exceptions. This is

2
lib/DOM/traits/DocumentOrElement.php → lib/traits/DocumentOrElement.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
// This exists because the DOM spec for some stupid reason doesn't give
// DocumentFragment some methods.

2
lib/DOM/traits/EscapeString.php → lib/traits/EscapeString.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
use MensBeam\Intl\Encoding\UTF8;

2
lib/DOM/traits/LeafNode.php → lib/traits/LeafNode.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
// Node in the DOM spec is dirty. Many nodes which inherit from it inherit
// methods it cannot use which all check for this and throw exceptions. This is

2
lib/DOM/traits/MagicProperties.php → lib/traits/MagicProperties.php

@ -5,7 +5,7 @@
*/
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
/**
* Getters and setters in PHP sucks. Instead of having getter and setter

2
lib/DOM/traits/Moonwalk.php → lib/traits/Moonwalk.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
trait Moonwalk {
/** Generator which walks up the DOM. Nonstandard. */

2
lib/DOM/traits/MoonwalkShallow.php → lib/traits/MoonwalkShallow.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
trait MoonwalkShallow {
/**

2
lib/DOM/traits/Node.php → lib/traits/Node.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
// Extensions to PHP's DOM cannot inherit from an extended Node parent, so a
// trait is the next best thing...

2
lib/DOM/traits/ParentNode.php → lib/traits/ParentNode.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
if (version_compare(\PHP_VERSION, '8.0', '>=')) {
# 4.2.6. Mixin ParentNode

2
lib/DOM/traits/ToString.php → lib/traits/ToString.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
trait ToString {
public function __toString(): string {

2
lib/DOM/traits/Walk.php → lib/traits/Walk.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
trait Walk {
/** Generator which walks down the DOM. Nonstandard. */

2
lib/DOM/traits/WalkShallow.php → lib/traits/WalkShallow.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
trait WalkShallow {
/**

2
tests/bootstrap.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
namespace MensBeam\HTML\DOM;
const NS_BASE = __NAMESPACE__."\\";
define(NS_BASE."BASE", dirname(__DIR__).DIRECTORY_SEPARATOR);

10
tests/cases/TestCharset.php

@ -4,12 +4,12 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
namespace MensBeam\HTML\DOM\TestCase;
use MensBeam\HTML\Charset;
use MensBeam\HTML\DOM\Charset;
/**
* @covers \MensBeam\HTML\Charset
* @covers \MensBeam\HTML\DOM\Charset
*/
class TestCharset extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideCharsets */
@ -73,8 +73,8 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
$tests = [];
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
$tests[] = $file;

34
tests/cases/TestDOM.php

@ -4,16 +4,16 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
namespace MensBeam\HTML\DOM\TestCase;
use MensBeam\HTML\Document;
use MensBeam\HTML\Parser;
use MensBeam\HTML\TemplateElement;
use MensBeam\HTML\DOM\Document;
use MensBeam\HTML\DOM\Parser;
use MensBeam\HTML\DOM\TemplateElement;
class TestDOM extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideNamespacedElements
* @covers \MensBeam\HTML\Document::createElementNS
* @covers \MensBeam\HTML\DOM\Document::createElementNS
*/
public function testCreateNamespacedElements(?string $nsIn, string $nameIn, ?string $nsOut, string $local, string $prefix): void {
$d = new Document;
@ -43,7 +43,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
}
/**
* @dataProvider provideBareElements
* @covers \MensBeam\HTML\Document::createElement
* @covers \MensBeam\HTML\DOM\Document::createElement
*/
public function testCreateBareElements(string $nameIn, $nameOut): void {
$d = new Document;
@ -62,7 +62,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
];
}
/** @covers \MensBeam\HTML\Document::createElementNS */
/** @covers \MensBeam\HTML\DOM\Document::createElementNS */
public function testCreateTemplateElements(): void {
$d = new Document;
$t = $d->createElement("template");
@ -87,7 +87,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideNamespacedAttributeCreations
* @covers \MensBeam\HTML\Document::createAttributeNS
* @covers \MensBeam\HTML\DOM\Document::createAttributeNS
*/
public function testCreateNamespacedAttributes(?string $nsIn, string $nameIn, string $local, string $prefix): void {
$d = new Document;
@ -114,7 +114,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideBareAttributeCreations
* @covers \MensBeam\HTML\Document::createAttribute
* @covers \MensBeam\HTML\DOM\Document::createAttribute
*/
public function testCreateBareAttributes(string $nameIn, string $nameOut): void {
$d = new Document;
@ -135,7 +135,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideNamespacedAttributeSettings
* @covers \MensBeam\HTML\Element::setAttributeNS
* @covers \MensBeam\HTML\DOM\Element::setAttributeNS
*/
public function testSetNamespoacedAttributes(?string $elementNS, ?string $attrNS, string $nameIn, string $nameOut): void {
$d = new Document;
@ -171,7 +171,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideBareAttributeSettings
* @covers \MensBeam\HTML\Element::setAttribute
* @covers \MensBeam\HTML\DOM\Element::setAttribute
*/
public function testSetBareAttributes(?string $elementNS, string $nameIn, string $nameOut): void {
$d = new Document;
@ -201,8 +201,8 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideAttributeNodeSettings
* @covers \MensBeam\HTML\Element::setAttributeNode
* @covers \MensBeam\HTML\Element::setAttributeNodeNS
* @covers \MensBeam\HTML\DOM\Element::setAttributeNode
* @covers \MensBeam\HTML\DOM\Element::setAttributeNodeNS
*/
public function testSetAttributeNodes(bool $ns, ?string $elementNS, ?string $attrNS, string $name): void {
$d = new Document;
@ -259,9 +259,9 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
}
/**
* @covers \MensBeam\HTML\Element::hasAttribute
* @covers \MensBeam\HTML\Element::getAttribute
* @covers \MensBeam\HTML\Element::getAttributeNS
* @covers \MensBeam\HTML\DOM\Element::hasAttribute
* @covers \MensBeam\HTML\DOM\Element::getAttribute
* @covers \MensBeam\HTML\DOM\Element::getAttributeNS
*/
public function testCheckForAttribute(): void {
$d = new Document;
@ -305,7 +305,7 @@ class TestDOM extends \PHPUnit\Framework\TestCase {
$this->assertSame("ack", $e->getAttributeNS("fake_ns", "eek"));
}
/** @covers \MensBeam\HTML\Element::__get */
/** @covers \MensBeam\HTML\DOM\Element::__get */
public function testGetInnerAndOuterHtml(): void {
$d = new Document;
$d->appendChild($d->createElement("html"));

22
tests/cases/TestSerializer.php

@ -4,19 +4,19 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
namespace MensBeam\HTML\DOM\TestCase;
use MensBeam\HTML\Document;
use MensBeam\HTML\Parser;
use MensBeam\HTML\DOM\Document;
use MensBeam\HTML\DOM\Parser;
/**
* @covers \MensBeam\HTML\Document
* @covers \MensBeam\HTML\DocumentFragment
* @covers \MensBeam\HTML\Element
* @covers \MensBeam\HTML\TemplateElement
* @covers \MensBeam\HTML\Comment
* @covers \MensBeam\HTML\Text
* @covers \MensBeam\HTML\ProcessingInstruction
* @covers \MensBeam\HTML\DOM\Document
* @covers \MensBeam\HTML\DOM\DocumentFragment
* @covers \MensBeam\HTML\DOM\Element
* @covers \MensBeam\HTML\DOM\TemplateElement
* @covers \MensBeam\HTML\DOM\Comment
* @covers \MensBeam\HTML\DOM\Text
* @covers \MensBeam\HTML\DOM\ProcessingInstruction
*/
class TestSerializer extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideStandardSerializerTests */
@ -28,7 +28,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
public function provideStandardSerializerTests(): iterable {
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
$index = 0;
$l = 0;

48
tests/cases/TestTokenizer.php

@ -4,31 +4,31 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
namespace MensBeam\HTML\DOM\TestCase;
use MensBeam\HTML\Data;
use MensBeam\HTML\EOFToken;
use MensBeam\HTML\OpenElementsStack;
use MensBeam\HTML\ParseError;
use MensBeam\HTML\Tokenizer;
use MensBeam\HTML\CharacterToken;
use MensBeam\HTML\CommentToken;
use MensBeam\HTML\DOCTYPEToken;
use MensBeam\HTML\EndTagToken;
use MensBeam\HTML\NullCharacterToken;
use MensBeam\HTML\StartTagToken;
use MensBeam\HTML\TokenAttr;
use MensBeam\HTML\WhitespaceToken;
use MensBeam\HTML\DOM\Data;
use MensBeam\HTML\DOM\EOFToken;
use MensBeam\HTML\DOM\OpenElementsStack;
use MensBeam\HTML\DOM\ParseError;
use MensBeam\HTML\DOM\Tokenizer;
use MensBeam\HTML\DOM\CharacterToken;
use MensBeam\HTML\DOM\CommentToken;
use MensBeam\HTML\DOM\DOCTYPEToken;
use MensBeam\HTML\DOM\EndTagToken;
use MensBeam\HTML\DOM\NullCharacterToken;
use MensBeam\HTML\DOM\StartTagToken;
use MensBeam\HTML\DOM\TokenAttr;
use MensBeam\HTML\DOM\WhitespaceToken;
/**
* @covers \MensBeam\HTML\Data
* @covers \MensBeam\HTML\Tokenizer
* @covers \MensBeam\HTML\CharacterToken
* @covers \MensBeam\HTML\CommentToken
* @covers \MensBeam\HTML\DataToken
* @covers \MensBeam\HTML\TagToken
* @covers \MensBeam\HTML\DOCTYPEToken
* @covers \MensBeam\HTML\TokenAttr
* @covers \MensBeam\HTML\DOM\Data
* @covers \MensBeam\HTML\DOM\Tokenizer
* @covers \MensBeam\HTML\DOM\CharacterToken
* @covers \MensBeam\HTML\DOM\CommentToken
* @covers \MensBeam\HTML\DOM\DataToken
* @covers \MensBeam\HTML\DOM\TagToken
* @covers \MensBeam\HTML\DOM\DOCTYPEToken
* @covers \MensBeam\HTML\DOM\TokenAttr
*/
class TestTokenizer extends \PHPUnit\Framework\TestCase {
const STATE_MAP = [
@ -87,8 +87,8 @@ class TestTokenizer extends \PHPUnit\Framework\TestCase {
$tests = [];
$blacklist = ["xmlViolation.test"];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/html5lib-tests/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/html5lib-tests/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
$tests[] = $file;

44
tests/cases/TestTreeConstructor.php

@ -4,31 +4,31 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
namespace MensBeam\HTML\DOM\TestCase;
use MensBeam\HTML\Data;
use MensBeam\HTML\LoopException;
use MensBeam\HTML\NotImplementedException;
use MensBeam\HTML\OpenElementsStack;
use MensBeam\HTML\ParseError;
use MensBeam\HTML\Parser;
use MensBeam\HTML\TemplateInsertionModesStack;
use MensBeam\HTML\Tokenizer;
use MensBeam\HTML\TreeBuilder;
use MensBeam\HTML\DOM\Data;
use MensBeam\HTML\DOM\LoopException;
use MensBeam\HTML\DOM\NotImplementedException;
use MensBeam\HTML\DOM\OpenElementsStack;
use MensBeam\HTML\DOM\ParseError;
use MensBeam\HTML\DOM\Parser;
use MensBeam\HTML\DOM\TemplateInsertionModesStack;
use MensBeam\HTML\DOM\Tokenizer;
use MensBeam\HTML\DOM\TreeBuilder;
/**
* @covers \MensBeam\HTML\Document
* @covers \MensBeam\HTML\Element
* @covers \MensBeam\HTML\Tokenizer
* @covers \MensBeam\HTML\TreeBuilder
* @covers \MensBeam\HTML\ActiveFormattingElementsList
* @covers \MensBeam\HTML\TemplateInsertionModesStack
* @covers \MensBeam\HTML\OpenElementsStack
* @covers \MensBeam\HTML\Stack
* @covers \MensBeam\HTML\TagToken
* @covers \MensBeam\HTML\DOM\Document
* @covers \MensBeam\HTML\DOM\Element
* @covers \MensBeam\HTML\DOM\Tokenizer
* @covers \MensBeam\HTML\DOM\TreeBuilder
* @covers \MensBeam\HTML\DOM\ActiveFormattingElementsList
* @covers \MensBeam\HTML\DOM\TemplateInsertionModesStack
* @covers \MensBeam\HTML\DOM\OpenElementsStack
* @covers \MensBeam\HTML\DOM\Stack
* @covers \MensBeam\HTML\DOM\TagToken
*/
class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
use \MensBeam\HTML\EscapeString;
use \MensBeam\HTML\DOM\EscapeString;
protected $out;
protected $depth;
@ -379,8 +379,8 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
public function provideStandardTreeTests(): iterable {
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
$index = 0;
$l = 0;

Loading…
Cancel
Save