Browse Source

Added additional entry point

• Added an option to use Document::loadHTML or Document::load to parse a document.
• Made the DOM elements use dW\HTML5 namespace instead of dW\HTML5\DOM.
• Fixed where TreeBuilder wasn't being properly destructed when the parser is finished.
ns
Dustin Wilson 6 years ago
parent
commit
fd6003fb4e
  1. 12
      README.md
  2. 5
      composer.json
  3. 2
      lib/DOM/Comment.php
  4. 24
      lib/DOM/Document.php
  5. 7
      lib/DOM/DocumentFragment.php
  6. 2
      lib/DOM/Element.php
  7. 4
      lib/DOM/Node.php
  8. 4
      lib/DOM/Printer.php
  9. 7
      lib/DOM/ProcessingInstruction.php
  10. 2
      lib/DOM/Text.php
  11. 6
      lib/Exception.php
  12. 15
      lib/Parser.php
  13. 2
      lib/TreeBuilder.php

12
README.md

@ -6,4 +6,14 @@ Tools for parsing and printing HTML5 documents and fragments.
<?php
$dom = dW\HTML5\Parser::parse('<!DOCTYPE html><html lang="en" charset="utf-8"><head><title>Ook!</title></head><body><h1>Ook!</h1><p>Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.</p><p>Eek!</p></body></html>');
?>
```
```
or:
```php
<?php
$dom = new dW\HTML\Document;
$dom->loadHTML('<!DOCTYPE html><html lang="en" charset="utf-8"><head><title>Ook!</title></head><body><h1>Ook!</h1><p>Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.</p><p>Eek!</p></body></html>');
?>
```

5
composer.json

@ -18,7 +18,10 @@
],
"autoload": {
"psr-4": {
"dW\\HTML5\\": "lib/"
"dW\\HTML5\\": [
"lib/",
"lib/DOM"
]
},
"classmap": ["lib/Token.php"]
},

2
lib/DOM/Comment.php

@ -1,6 +1,6 @@
<?php
declare(strict_types=1);
namespace dW\HTML5\DOM;
namespace dW\HTML5;
class Comment extends \DOMComment {
use Node;

24
lib/DOM/Document.php

@ -1,6 +1,6 @@
<?php
declare(strict_types=1);
namespace dW\HTML5\DOM;
namespace dW\HTML5;
class Document extends \DOMDocument {
use Printer;
@ -8,9 +8,11 @@ class Document extends \DOMDocument {
public function __construct() {
parent::__construct();
$this->registerNodeClass('DOMComment', '\dW\HTML5\DOM\Comment');
$this->registerNodeClass('DOMElement', '\dW\HTML5\DOM\Element');
$this->registerNodeClass('DOMText', '\dW\HTML5\DOM\Text');
$this->registerNodeClass('DOMComment', '\dW\HTML5\Comment');
$this->registerNodeClass('DOMDocumentFragment', '\dW\HTML5\DocumentFragment');
$this->registerNodeClass('DOMElement', '\dW\HTML5\Element');
$this->registerNodeClass('DOMProcessingInstruction', '\dW\HTML5\ProcessingInstruction');
$this->registerNodeClass('DOMText', '\dW\HTML5\Text');
}
public function fixIdAttributes() {
@ -44,7 +46,17 @@ class Document extends \DOMDocument {
$this->normalize();
}
public function load($source, $options = null) {}
public function loadHTML($source, $options = null) {}
public function load($source, $options = null): bool {
Parser::parse((string)$source, $this, true);
return true;
}
public function loadHTML($source, $options = null): bool {
Parser::parse((string)$source, $this);
return true;
}
public function loadXML($source, $options = null) {}
}

7
lib/DOM/DocumentFragment.php

@ -0,0 +1,7 @@
<?php
declare(strict_types=1);
namespace dW\HTML5;
class DocumentFragment extends \DOMDocumentFragment {
use Node;
}

2
lib/DOM/Element.php

@ -1,6 +1,6 @@
<?php
declare(strict_types=1);
namespace dW\HTML5\DOM;
namespace dW\HTML5;
class Element extends \DOMElement {
use Node;

4
lib/DOM/Node.php

@ -1,6 +1,6 @@
<?php
declare(strict_types=1);
namespace dW\HTML5\DOM;
namespace dW\HTML5;
trait Node {
public function getAncestor($needle): Element {
@ -37,7 +37,7 @@ trait Node {
return $context;
}
} else {
throw new \dW\HTML5\Exception(\dW\HTML5\Exception::DOM_DOMNODE_STRING_OR_CLOSURE_EXPECTED, gettype($needle));
throw new Exception(Exception::DOM_DOMNODE_STRING_OR_CLOSURE_EXPECTED, gettype($needle));
}
return null;

4
lib/DOM/Printer.php

@ -1,6 +1,6 @@
<?php
declare(strict_types=1);
namespace dW\HTML5\DOM;
namespace dW\HTML5;
trait Printer {
protected $selfClosingElements = ['area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
@ -11,7 +11,7 @@ trait Printer {
}
if (!$node instanceof \DOMElement && !$node instanceof \DOMDocument && !$node instanceof \DOMDocumentFragment) {
throw new \dW\HTML5\Exception(\dW\HTML5\Exception::DOM_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED, gettype($node));
throw new Exception(Exception::DOM_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED, gettype($node));
}
# 1. Let s be a string, and initialize it to the empty string.

7
lib/DOM/ProcessingInstruction.php

@ -0,0 +1,7 @@
<?php
declare(strict_types=1);
namespace dW\HTML5;
class ProcessingInstruction extends \DOMProcessingInstruction {
use Node;
}

2
lib/DOM/Text.php

@ -1,6 +1,6 @@
<?php
declare(strict_types=1);
namespace dW\HTML5\DOM;
namespace dW\HTML5;
class Text extends \DOMText {
use Node;

6
lib/Exception.php

@ -21,6 +21,8 @@ class Exception extends \Exception {
const TREEBUILDER_FORMELEMENT_EXPECTED = 10501;
const TREEBUILDER_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10502;
const PARSER_NONEMPTY_DOCUMENT = 10601;
protected static $messages = [10000 => 'Invalid error code',
10001 => 'Unknown error; escaping',
10002 => 'Incorrect number of parameters for Exception message; %s expected',
@ -37,7 +39,9 @@ class Exception extends \Exception {
10401 => 'The Tokenizer has entered an invalid state',
10501 => 'Form element expected, found %s',
10502 => 'Element, Document, or DOMDocumentFragment expected; found %s'];
10502 => 'Element, Document, or DOMDocumentFragment expected; found %s',
10601 => 'Non-empty Document supplied as argument for Parser'];
public function __construct(int $code, ...$args) {
if (!isset(static::$messages[$code])) {

15
lib/Parser.php

@ -53,10 +53,11 @@ class Parser {
}
public function __destruct() {
$this->treeBuilder->__destruct();
static::$instance = null;
}
public static function parse(string $data, bool $file = false) {
public static function parse(string $data, Document $document = null, bool $file = false) {
// If parse() is called by parseFragment() then don't create an instance. It has
// already been created.
$c = __CLASS__;
@ -64,8 +65,14 @@ class Parser {
static::$instance = new $c;
}
if (is_null(static::$instance->DOM)) {
static::$instance->DOM = new DOM\Document();
if (is_null($document)) {
static::$instance->DOM = new Document();
} else {
if ($document->hasChildNodes()) {
throw new Exception(Exception::PARSER_NONEMPTY_DOCUMENT);
}
static::$instance->DOM = $document;
}
// Process the input stream.
@ -109,7 +116,7 @@ class Parser {
$c = __CLASS__;
static::$instance = new $c;
static::$instance->DOM = (is_null($context)) ? new DOM\Document() : $context->ownerDocument;
static::$instance->DOM = (is_null($context)) ? new Document() : $context->ownerDocument;
static::$instance->DOMFragment = static::$instance->DOM->createDocumentFragment();
// DEVIATION: The spec says to let the document be in quirks mode if the

2
lib/TreeBuilder.php

@ -76,7 +76,7 @@ class TreeBuilder {
const QUIRKS_MODE_LIMITED = 2;
public function __construct(DOM\Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Stack $templateInsertionModes, Tokenizer $tokenizer) {
public function __construct(Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Stack $templateInsertionModes, Tokenizer $tokenizer) {
// If the form element isn't an instance of DOMElement that has a node name of
// "form" or null then there's a problem.
if (!is_null($formElement) && !($formElement instanceof DOMElement && $formElement->nodeName === 'form')) {

Loading…
Cancel
Save