Browse Source

More tests, updated name coercion

wrapper-classes
Dustin Wilson 3 years ago
parent
commit
7940aeac97
  1. 9
      composer.json
  2. 57
      composer.lock
  3. 2
      lib/AbstractDocument.php
  4. 6
      lib/Document.php
  5. 2
      lib/Element.php
  6. 21
      lib/traits/DocumentOrElement.php
  7. 58
      lib/traits/EscapeString.php
  8. 1
      lib/traits/Moonwalk.php
  9. 1
      lib/traits/Node.php
  10. 1
      lib/traits/ParentNode.php
  11. 1
      lib/traits/ToString.php
  12. 1
      lib/traits/Walk.php
  13. 47
      tests/cases/Document/TestDocument.php

9
composer.json

@ -35,7 +35,11 @@
"autoload-dev": {
"psr-4": {
"MensBeam\\HTML\\DOM\\Test\\": "tests/lib/",
"MensBeam\\HTML\\DOM\\TestCase\\": "tests/cases/"
"MensBeam\\HTML\\DOM\\TestCase\\": [
"tests/cases/",
"tests/cases/Document",
"tests/cases/Serializer"
]
}
},
"repositories": [
@ -46,6 +50,7 @@
],
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"daux/daux.io": "^0.16.0"
"daux/daux.io": "^0.16.0",
"mikey179/vfsstream": "^1.6"
}
}

57
composer.lock

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "fa97f8577cdf49f5f1dc348f28cf141b",
"content-hash": "a7994e69dd088a79ef9b73cb8f581abd",
"packages": [
{
"name": "mensbeam/html-parser",
@ -12,7 +12,7 @@
"source": {
"type": "git",
"url": "mensbeam-gitea:MensBeam/HTML-Parser.git",
"reference": "a88a3ae1074c368592d2c6c1176eac5a4ad2ac02"
"reference": "6ee9f0ccc22f0f1094eda44a89069c5f2ef2280f"
},
"require": {
"ext-dom": "*",
@ -72,7 +72,7 @@
}
],
"description": "Parses modern HTML text into a PHP DOMDocument",
"time": "2021-10-01T02:59:03+00:00"
"time": "2021-10-02T02:46:52+00:00"
},
{
"name": "mensbeam/intl",
@ -762,6 +762,57 @@
},
"time": "2020-12-25T05:00:37+00:00"
},
{
"name": "mikey179/vfsstream",
"version": "v1.6.10",
"source": {
"type": "git",
"url": "https://github.com/bovigo/vfsStream.git",
"reference": "250c0825537d501e327df879fb3d4cd751933b85"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/bovigo/vfsStream/zipball/250c0825537d501e327df879fb3d4cd751933b85",
"reference": "250c0825537d501e327df879fb3d4cd751933b85",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.5|^5.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.6.x-dev"
}
},
"autoload": {
"psr-0": {
"org\\bovigo\\vfs\\": "src/main/php"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"authors": [
{
"name": "Frank Kleine",
"homepage": "http://frankkleine.de/",
"role": "Developer"
}
],
"description": "Virtual file system to mock the real file system in unit tests.",
"homepage": "http://vfs.bovigo.org/",
"support": {
"issues": "https://github.com/bovigo/vfsStream/issues",
"source": "https://github.com/bovigo/vfsStream/tree/master",
"wiki": "https://github.com/bovigo/vfsStream/wiki"
},
"time": "2021-09-25T08:05:01+00:00"
},
{
"name": "psr/container",
"version": "1.1.1",

2
lib/AbstractDocument.php

@ -10,5 +10,5 @@ namespace MensBeam\HTML\DOM;
// Exists so Document can extend methods from its traits.
abstract class AbstractDocument extends \DOMDocument {
use DocumentOrElement, EscapeString, MagicProperties, ParentNode, Walk;
use DocumentOrElement, MagicProperties, ParentNode, Walk;
}

6
lib/Document.php

@ -386,7 +386,7 @@ class Document extends AbstractDocument {
}
public function save($filename, $options = null) {
return file_put_contents($filename, $this->serialize());
return file_put_contents($filename, $this->saveHTML());
}
public function saveHTML(\DOMNode $node = null): string {
@ -432,11 +432,11 @@ class Document extends AbstractDocument {
}
public function validate(): bool {
return true;
throw new DOMException(DOMException::NOT_SUPPORTED, __CLASS__ . ' is only meant for HTML');
}
public function xinclude($options = null): bool {
return false;
throw new DOMException(DOMException::NOT_SUPPORTED, __CLASS__ . ' is only meant for HTML');
}

2
lib/Element.php

@ -11,7 +11,7 @@ use MensBeam\HTML\Parser;
class Element extends \DOMElement {
use DocumentOrElement, EscapeString, MagicProperties, Moonwalk, ParentNode, ToString, Walk;
use DocumentOrElement, MagicProperties, Moonwalk, ParentNode, ToString, Walk;
protected $_classList;

21
lib/traits/DocumentOrElement.php

@ -7,10 +7,14 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\HTML\Parser\NameCoercion;
// This exists because the DOM spec for some stupid reason doesn't give
// DocumentFragment some methods.
trait DocumentOrElement {
use NameCoercion;
public function getElementsByClassName(string $classNames): \DOMNodeList {
# The list of elements with class names classNames for a node root is the
# HTMLCollection returned by the following algorithm:
@ -57,4 +61,21 @@ trait DocumentOrElement {
return ($isDocument) ? $document->xpath->query($query) : $document->xpath->query($query, $this);
}
protected function escapeString(string $string, bool $attribute = false): string {
# Escaping a string (for the purposes of the algorithm above) consists of
# running the following steps:
# 1. Replace any occurrence of the "&" character by the string "&".
# 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
# string " ".
$string = str_replace(['&', "\u{A0}"], ['&', ' '], $string);
# 3. If the algorithm was invoked in the attribute mode, replace any
# occurrences of the """ character by the string """.
# 4. If the algorithm was not invoked in the attribute mode, replace any
# occurrences of the "<" character by the string "&lt;", and any
# occurrences of the ">" character by the string "&gt;".
return ($attribute) ? str_replace('"', '&quot;', $string) : str_replace(['<', '>'], ['&lt;', '&gt;'], $string);
}
}

58
lib/traits/EscapeString.php

@ -1,58 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017, Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Intl\Encoding\UTF8;
trait EscapeString {
protected function escapeString(string $string, bool $attribute = false): string {
# Escaping a string (for the purposes of the algorithm above) consists of
# running the following steps:
# 1. Replace any occurrence of the "&" character by the string "&amp;".
# 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
# string "&nbsp;".
$string = str_replace(['&', "\u{A0}"], ['&amp;', '&nbsp;'], $string);
# 3. If the algorithm was invoked in the attribute mode, replace any
# occurrences of the """ character by the string "&quot;".
# 4. If the algorithm was not invoked in the attribute mode, replace any
# occurrences of the "<" character by the string "&lt;", and any
# occurrences of the ">" character by the string "&gt;".
return ($attribute) ? str_replace('"', '&quot;', $string) : str_replace(['<', '>'], ['&lt;', '&gt;'], $string);
}
protected function coerceName(string $name): string {
// This matches the inverse of the production of NameChar in XML 1.0,
// with the added exclusion of ":" from allowed characters
// See https://www.w3.org/TR/REC-xml/#NT-NameStartChar
preg_match_all('/[^\-\.0-9\x{B7}\x{300}-\x{36F}\x{203F}-\x{2040}A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m);
foreach (array_unique($m[0], \SORT_STRING) as $c) {
$o = (new UTF8($c))->nextCode();
$esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT);
$name = str_replace($c, $esc, $name);
}
// Apply stricter rules to the first character
if (preg_match('/^[^A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m)) {
$c = $m[0];
$o = (new UTF8($c))->nextCode();
$esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT);
$name = $esc.substr($name, strlen($c));
}
return $name;
}
protected function uncoerceName(string $name): string {
preg_match_all('/U[0-9A-F]{6}/', $name, $m);
foreach (array_unique($m[0], \SORT_STRING) as $o) {
$c = UTF8::encode(hexdec(substr($o, 1)));
$name = str_replace($o, $c, $name);
}
return $name;
}
}

1
lib/traits/Moonwalk.php

@ -8,6 +8,7 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
trait Moonwalk {
/**
* Generator which walks backwards through the DOM from the node the method is

1
lib/traits/Node.php

@ -8,6 +8,7 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
// Extensions to PHP's DOM cannot inherit from an extended Node parent, so a
// trait is the next best thing...
trait Node {

1
lib/traits/ParentNode.php

@ -8,6 +8,7 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
# 4.2.6. Mixin ParentNode
trait ParentNode {
use Node, ParentNodePolyfill;

1
lib/traits/ToString.php

@ -8,6 +8,7 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
trait ToString {
public function __toString(): string {
$frag = $this->ownerDocument->createDocumentFragment();

1
lib/traits/Walk.php

@ -8,6 +8,7 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
trait Walk {
/**
* Generator which walks down the DOM from the node the method is being run on.

47
tests/cases/Document/TestDocument.php

@ -11,10 +11,10 @@ use MensBeam\HTML\DOM\{
DOMException,
Element,
ElementMap,
Exception,
HTMLTemplateElement
};
use MensBeam\HTML\Parser;
use MensBeam\HTML\Parser,
org\bovigo\vfs\vfsStream;
/** @covers \MensBeam\HTML\DOM\Document */
@ -63,26 +63,27 @@ class TestDocument extends \PHPUnit\Framework\TestCase {
}
public function provideDisabledMethods(): iterable {
return [
[ 'loadXML', 'ook' ],
[ 'saveXML', null ],
[ 'validate', null ],
[ 'xinclude', null ],
];
}
/**
* @dataProvider provideDisabledMethods
* @covers \MensBeam\HTML\DOM\Document::loadXML
* @covers \MensBeam\HTML\DOM\Document::saveXML
* @covers \MensBeam\HTML\DOM\Document::validate
* @covers \MensBeam\HTML\DOM\Document::xinclude
*/
public function testDisabledMethods() {
public function testDisabledMethods(string $methodName, ?string $argument): void {
$this->expectException(DOMException::class);
$this->expectExceptionCode(DOMException::NOT_SUPPORTED);
$d = new Document();
$d->loadXML('ook');
$d = new Document();
$d->saveXML('ook');
$d = new Document();
$d->validate();
$d = new Document();
$d->xinclude('ook');
$d->$methodName($argument);
}
@ -90,6 +91,8 @@ class TestDocument extends \PHPUnit\Framework\TestCase {
* @covers \MensBeam\HTML\DOM\Document::__construct
* @covers \MensBeam\HTML\DOM\Document::loadDOM
* @covers \MensBeam\HTML\DOM\Document::loadHTML
* @covers \MensBeam\HTML\DOM\Document::preInsertionValidity
* @covers \MensBeam\HTML\DOM\Document::__get_quirksMode
*/
public function testDocumentCreation(): void {
// Test null source
@ -159,7 +162,7 @@ class TestDocument extends \PHPUnit\Framework\TestCase {
* @covers \MensBeam\HTML\DOM\Document::validateAndExtract
*/
public function testElementCreationNS(?string $nsIn, ?string $nsOut, string $localIn, string $localOut, string $class): void {
$d = new Document;
$d = new Document();
$n = $d->createElementNS($nsIn, $localIn);
$this->assertInstanceOf($class, $n);
$this->assertNotNull($n->ownerDocument);
@ -168,6 +171,20 @@ class TestDocument extends \PHPUnit\Framework\TestCase {
}
/**
* @covers \MensBeam\HTML\DOM\Document::save
* @covers \MensBeam\HTML\DOM\Document::saveHTMLFile
*/
public function testFileSaving(): void {
$vfs = vfsStream::setup('DOM', 0777);
$d = new Document();
$d->appendChild($d->createElement('html'));
$path = $vfs->url() . '/test.html';
$d->save($path);
$this->assertSame('<html></html>', file_get_contents($path));
}
/** @covers \MensBeam\HTML\DOM\Document::importNode */
public function testImportingNodes() {
$d = new Document();
@ -235,4 +252,4 @@ class TestDocument extends \PHPUnit\Framework\TestCase {
$d->body->removeChild($t);
$this->assertFalse(ElementMap::has($t));
}
}
}
Loading…
Cancel
Save