From 2e2563cf15b037445948a3a2700b75966a0b3783 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Sat, 23 Oct 2021 17:04:34 -0400 Subject: [PATCH] Document serializer --- README.md | 44 ++++++++++++++++++-- composer.lock | 71 +------------------------------- lib/Parser.php | 5 +-- lib/Parser/Serializer.php | 6 +-- tests/cases/TestSerializer.php | 6 +-- vendor-bin/phpunit/composer.lock | 15 +++---- 6 files changed, 57 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index 46d280f..a39be0e 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # HTML-Parser -A modern, accurate HTML parser for PHP. +A modern, accurate HTML parser and serializer for PHP. ## Usage ### Parsing documents ```php -public MensBeam\HTML\Parser::parse( +public static MensBeam\HTML\Parser::parse( string $data, ?string $encodingOrContentType = null. ?MensBeam\HTML\Parser\Config $config = null @@ -26,7 +26,7 @@ Extra configuration parameters may be given to the parser by passing a `MensBeam ### Parsing fragments ```php -public MensBeam\HTML\Parser::parse( +public static MensBeam\HTML\Parser::parse( DOMElement $contextElement, int $quirksMode, string $data, @@ -41,7 +41,33 @@ If the "quirks mode" property of the document is not know, using `Parser::NO_QUI Unlike the `parse()` method, the `parseFragment()` method returns a `DOMDocumentFragment` object belonging to `$contextElement`'s owner document. -### Examples +### Serializing nodes + +```php +public static MensBeam\HTML\Parser::serialize(DOMNode $node): string +``` + +```php +public static MensBeam\HTML\Parser::serializeInner(DOMNode $node): string +``` + +The `MensBeam\HTML\Parser::serialize` method can be used to convert most `DOMNode` objects into strings, using the basic algorithm defined in the HTML specification. Nodes of the following types can be successfully serialized: + +- `DOMDocument` +- `DOMElement` +- `DOMText` +- `DOMComment` +- `DOMDocumentFragment` +- `DOMDocumentType` +- `DOMProcessingInstruction` + +Similarly, the `MensBeam\HTML\Parser::serializeInner` method can be used to convert the children of non-leaf `DOMNode` objects into strings, using the basic algorithm defined in the HTML specification. Children of nodes of the following types can be successfully serialized: + +- `DOMDocument` +- `DOMElement` +- `DOMDocumentFragment` + +## Examples - Parsing a document with unknown encoding: @@ -110,6 +136,16 @@ Unlike the `parse()` method, the `parseFragment()` method returns a `DOMDocument echo $mathFragment->firstChild->namespaceURI; // prints "http://www.w3.org/1998/Math/MathML" ``` +- Serializing documents and elements: + + ```php + use MensBeam\HTML\Parser; + + $document = Parser::parse("Ook

Eek"); + echo Parser::serialize($document); // prints "Ook

Eek

+ echo Parser::serializeInner($document->getElementsByTagName("body")[0]); // prints "Ook

Eek

+ ``` + ## Configuration The `MensBeam\HTML\Parser\Config` class is used as a container for configuration parameters for the parser. We have tried to use rational defaults, but some parameters are nevertheless configurable: diff --git a/composer.lock b/composer.lock index 442291c..7e4d7b3 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "10ef46b0f5366da9ef3b006f3bfc2e8e", + "content-hash": "3bd2c9527ab034f6d6c69b494c92178a", "packages": [ { "name": "mensbeam/intl", @@ -216,75 +216,6 @@ "source": "https://github.com/bamarni/composer-bin-plugin/tree/master" }, "time": "2020-05-03T08:27:20+00:00" - }, - { - "name": "masterminds/html5", - "version": "2.7.5", - "source": { - "type": "git", - "url": "https://github.com/Masterminds/html5-php.git", - "reference": "f640ac1bdddff06ea333a920c95bbad8872429ab" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/f640ac1bdddff06ea333a920c95bbad8872429ab", - "reference": "f640ac1bdddff06ea333a920c95bbad8872429ab", - "shasum": "" - }, - "require": { - "ext-ctype": "*", - "ext-dom": "*", - "ext-libxml": "*", - "php": ">=5.3.0" - }, - "require-dev": { - "phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.7-dev" - } - }, - "autoload": { - "psr-4": { - "Masterminds\\": "src" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Matt Butcher", - "email": "technosophos@gmail.com" - }, - { - "name": "Matt Farina", - "email": "matt@mattfarina.com" - }, - { - "name": "Asmir Mustafic", - "email": "goetas@gmail.com" - } - ], - "description": "An HTML5 parser and serializer.", - "homepage": "http://masterminds.github.io/html5-php", - "keywords": [ - "HTML5", - "dom", - "html", - "parser", - "querypath", - "serializer", - "xml" - ], - "support": { - "issues": "https://github.com/Masterminds/html5-php/issues", - "source": "https://github.com/Masterminds/html5-php/tree/2.7.5" - }, - "time": "2021-07-01T14:25:37+00:00" } ], "aliases": [], diff --git a/lib/Parser.php b/lib/Parser.php index e36d620..03259c3 100644 --- a/lib/Parser.php +++ b/lib/Parser.php @@ -6,7 +6,6 @@ declare(strict_types=1); namespace MensBeam\HTML; -use MensBeam\HTML\Parser\Charset; use MensBeam\HTML\Parser\Data; use MensBeam\HTML\Parser\ParseError; use MensBeam\HTML\Parser\Config; @@ -17,8 +16,9 @@ use MensBeam\HTML\Parser\TemplateInsertionModesStack; use MensBeam\HTML\Parser\Tokenizer; use MensBeam\HTML\Parser\TreeConstructor; use MensBeam\HTML\Parser\Output; +use MensBeam\HTML\Parser\Serializer; -class Parser { +class Parser extends Serializer { public const NO_QUIRKS_MODE = 0; public const QUIRKS_MODE = 1; public const LIMITED_QUIRKS_MODE = 2; @@ -47,7 +47,6 @@ class Parser { * @param \MensBeam\HTML\Parser\Config|null $config The configuration parameters to use, if any */ public static function parse(string $data, ?string $encodingOrContentType = null, ?Config $config = null): Output { - // parse the document return static::parseDocumentOrFragment($data, $encodingOrContentType, null, null, $config ?? new Config); } diff --git a/lib/Parser/Serializer.php b/lib/Parser/Serializer.php index a3c430e..2fbaa3d 100644 --- a/lib/Parser/Serializer.php +++ b/lib/Parser/Serializer.php @@ -18,7 +18,7 @@ abstract class Serializer { * * @param \DOMDocument|\DOMElement|\DOMText|\DOMComment|\DOMProcessingInstruction|\DOMDocumentFragment|\DOMDocumentType $node The node to serialize */ - public static function serializeOuter(\DOMNode $node): string { + public static function serialize(\DOMNode $node): string { $s = ""; $stack = []; $n = $node; @@ -220,9 +220,9 @@ abstract class Serializer { } if ($node instanceof \DOMElement || $node instanceof \DOMDocument || $node instanceof \DOMDocumentFragment) { # For each child node of the node, in tree order, run the following steps: - // NOTE: the steps in question are implemented in the "serializeOuter" routine + // NOTE: the steps in question are implemented in the "serialize" routine foreach ($node->childNodes as $n) { - $s .= self::serializeOuter($n); + $s .= self::serialize($n); } } else { throw new Exception(Exception::UNSUPPORTED_NODE_TYPE, [get_class($node)]); diff --git a/tests/cases/TestSerializer.php b/tests/cases/TestSerializer.php index 522867d..b5ea55a 100644 --- a/tests/cases/TestSerializer.php +++ b/tests/cases/TestSerializer.php @@ -21,7 +21,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase { /** @dataProvider provideStandardTreeTests */ public function testStandardTreeTests(array $data, bool $fragment, string $exp): void { $node = $this->buildTree($data, $fragment); - $this->assertSame($exp, Serializer::serializeOuter($node)); + $this->assertSame($exp, Serializer::serialize($node)); } public function provideStandardTreeTests(): iterable { @@ -53,7 +53,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase { $exp1 = $exp; $exp2 = ""; $this->assertSame($exp1, Serializer::serializeInner($t)); - $this->assertSame($exp2, Serializer::serializeOuter($t)); + $this->assertSame($exp2, Serializer::serialize($t)); } public function provideTemplateTests(): iterable { @@ -140,7 +140,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase { $d = new \DOMDocument; $a = $d->createAttribute("oops"); $this->expectExceptionObject(new Exception(Exception::UNSUPPORTED_NODE_TYPE, [\DOMAttr::class])); - Serializer::serializeOuter($a); + Serializer::serialize($a); } public function testInnerSerializeAnInvalidNode(): void { diff --git a/vendor-bin/phpunit/composer.lock b/vendor-bin/phpunit/composer.lock index d6d4c14..2f525c8 100644 --- a/vendor-bin/phpunit/composer.lock +++ b/vendor-bin/phpunit/composer.lock @@ -355,16 +355,16 @@ }, { "name": "phpdocumentor/reflection-docblock", - "version": "5.2.2", + "version": "5.3.0", "source": { "type": "git", "url": "https://github.com/phpDocumentor/ReflectionDocBlock.git", - "reference": "069a785b2141f5bcf49f3e353548dc1cce6df556" + "reference": "622548b623e81ca6d78b721c5e029f4ce664f170" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/069a785b2141f5bcf49f3e353548dc1cce6df556", - "reference": "069a785b2141f5bcf49f3e353548dc1cce6df556", + "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/622548b623e81ca6d78b721c5e029f4ce664f170", + "reference": "622548b623e81ca6d78b721c5e029f4ce664f170", "shasum": "" }, "require": { @@ -375,7 +375,8 @@ "webmozart/assert": "^1.9.1" }, "require-dev": { - "mockery/mockery": "~1.3.2" + "mockery/mockery": "~1.3.2", + "psalm/phar": "^4.8" }, "type": "library", "extra": { @@ -405,9 +406,9 @@ "description": "With this component, a library can provide support for annotations via DocBlocks or otherwise retrieve information that is embedded in a DocBlock.", "support": { "issues": "https://github.com/phpDocumentor/ReflectionDocBlock/issues", - "source": "https://github.com/phpDocumentor/ReflectionDocBlock/tree/master" + "source": "https://github.com/phpDocumentor/ReflectionDocBlock/tree/5.3.0" }, - "time": "2020-09-03T19:13:55+00:00" + "time": "2021-10-19T17:43:47+00:00" }, { "name": "phpdocumentor/type-resolver",