Browse Source

Added additional entry point

• Added an option to use Document::loadHTML or Document::load to parse a document.
• Made the DOM elements use dW\HTML5 namespace instead of dW\HTML5\DOM.
• Fixed where TreeBuilder wasn't being properly destructed when the parser is finished.
split-manual
Dustin Wilson 6 years ago
parent
commit
fd6003fb4e
  1. 12
      README.md
  2. 5
      composer.json
  3. 2
      lib/DOM/Comment.php
  4. 24
      lib/DOM/Document.php
  5. 7
      lib/DOM/DocumentFragment.php
  6. 2
      lib/DOM/Element.php
  7. 4
      lib/DOM/Node.php
  8. 4
      lib/DOM/Printer.php
  9. 7
      lib/DOM/ProcessingInstruction.php
  10. 2
      lib/DOM/Text.php
  11. 6
      lib/Exception.php
  12. 15
      lib/Parser.php
  13. 2
      lib/TreeBuilder.php

12
README.md

@ -6,4 +6,14 @@ Tools for parsing and printing HTML5 documents and fragments.
<?php <?php
$dom = dW\HTML5\Parser::parse('<!DOCTYPE html><html lang="en" charset="utf-8"><head><title>Ook!</title></head><body><h1>Ook!</h1><p>Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.</p><p>Eek!</p></body></html>'); $dom = dW\HTML5\Parser::parse('<!DOCTYPE html><html lang="en" charset="utf-8"><head><title>Ook!</title></head><body><h1>Ook!</h1><p>Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.</p><p>Eek!</p></body></html>');
?> ?>
``` ```
or:
```php
<?php
$dom = new dW\HTML\Document;
$dom->loadHTML('<!DOCTYPE html><html lang="en" charset="utf-8"><head><title>Ook!</title></head><body><h1>Ook!</h1><p>Ook-ook? Oooook. Ook ook oook ook oooooook ook ooook ook.</p><p>Eek!</p></body></html>');
?>
```

5
composer.json

@ -18,7 +18,10 @@
], ],
"autoload": { "autoload": {
"psr-4": { "psr-4": {
"dW\\HTML5\\": "lib/" "dW\\HTML5\\": [
"lib/",
"lib/DOM"
]
}, },
"classmap": ["lib/Token.php"] "classmap": ["lib/Token.php"]
}, },

2
lib/DOM/Comment.php

@ -1,6 +1,6 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace dW\HTML5\DOM; namespace dW\HTML5;
class Comment extends \DOMComment { class Comment extends \DOMComment {
use Node; use Node;

24
lib/DOM/Document.php

@ -1,6 +1,6 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace dW\HTML5\DOM; namespace dW\HTML5;
class Document extends \DOMDocument { class Document extends \DOMDocument {
use Printer; use Printer;
@ -8,9 +8,11 @@ class Document extends \DOMDocument {
public function __construct() { public function __construct() {
parent::__construct(); parent::__construct();
$this->registerNodeClass('DOMComment', '\dW\HTML5\DOM\Comment'); $this->registerNodeClass('DOMComment', '\dW\HTML5\Comment');
$this->registerNodeClass('DOMElement', '\dW\HTML5\DOM\Element'); $this->registerNodeClass('DOMDocumentFragment', '\dW\HTML5\DocumentFragment');
$this->registerNodeClass('DOMText', '\dW\HTML5\DOM\Text'); $this->registerNodeClass('DOMElement', '\dW\HTML5\Element');
$this->registerNodeClass('DOMProcessingInstruction', '\dW\HTML5\ProcessingInstruction');
$this->registerNodeClass('DOMText', '\dW\HTML5\Text');
} }
public function fixIdAttributes() { public function fixIdAttributes() {
@ -44,7 +46,17 @@ class Document extends \DOMDocument {
$this->normalize(); $this->normalize();
} }
public function load($source, $options = null) {} public function load($source, $options = null): bool {
public function loadHTML($source, $options = null) {} Parser::parse((string)$source, $this, true);
return true;
}
public function loadHTML($source, $options = null): bool {
Parser::parse((string)$source, $this);
return true;
}
public function loadXML($source, $options = null) {} public function loadXML($source, $options = null) {}
} }

7
lib/DOM/DocumentFragment.php

@ -0,0 +1,7 @@
<?php
declare(strict_types=1);
namespace dW\HTML5;
class DocumentFragment extends \DOMDocumentFragment {
use Node;
}

2
lib/DOM/Element.php

@ -1,6 +1,6 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace dW\HTML5\DOM; namespace dW\HTML5;
class Element extends \DOMElement { class Element extends \DOMElement {
use Node; use Node;

4
lib/DOM/Node.php

@ -1,6 +1,6 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace dW\HTML5\DOM; namespace dW\HTML5;
trait Node { trait Node {
public function getAncestor($needle): Element { public function getAncestor($needle): Element {
@ -37,7 +37,7 @@ trait Node {
return $context; return $context;
} }
} else { } else {
throw new \dW\HTML5\Exception(\dW\HTML5\Exception::DOM_DOMNODE_STRING_OR_CLOSURE_EXPECTED, gettype($needle)); throw new Exception(Exception::DOM_DOMNODE_STRING_OR_CLOSURE_EXPECTED, gettype($needle));
} }
return null; return null;

4
lib/DOM/Printer.php

@ -1,6 +1,6 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace dW\HTML5\DOM; namespace dW\HTML5;
trait Printer { trait Printer {
protected $selfClosingElements = ['area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']; protected $selfClosingElements = ['area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
@ -11,7 +11,7 @@ trait Printer {
} }
if (!$node instanceof \DOMElement && !$node instanceof \DOMDocument && !$node instanceof \DOMDocumentFragment) { if (!$node instanceof \DOMElement && !$node instanceof \DOMDocument && !$node instanceof \DOMDocumentFragment) {
throw new \dW\HTML5\Exception(\dW\HTML5\Exception::DOM_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED, gettype($node)); throw new Exception(Exception::DOM_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED, gettype($node));
} }
# 1. Let s be a string, and initialize it to the empty string. # 1. Let s be a string, and initialize it to the empty string.

7
lib/DOM/ProcessingInstruction.php

@ -0,0 +1,7 @@
<?php
declare(strict_types=1);
namespace dW\HTML5;
class ProcessingInstruction extends \DOMProcessingInstruction {
use Node;
}

2
lib/DOM/Text.php

@ -1,6 +1,6 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace dW\HTML5\DOM; namespace dW\HTML5;
class Text extends \DOMText { class Text extends \DOMText {
use Node; use Node;

6
lib/Exception.php

@ -21,6 +21,8 @@ class Exception extends \Exception {
const TREEBUILDER_FORMELEMENT_EXPECTED = 10501; const TREEBUILDER_FORMELEMENT_EXPECTED = 10501;
const TREEBUILDER_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10502; const TREEBUILDER_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10502;
const PARSER_NONEMPTY_DOCUMENT = 10601;
protected static $messages = [10000 => 'Invalid error code', protected static $messages = [10000 => 'Invalid error code',
10001 => 'Unknown error; escaping', 10001 => 'Unknown error; escaping',
10002 => 'Incorrect number of parameters for Exception message; %s expected', 10002 => 'Incorrect number of parameters for Exception message; %s expected',
@ -37,7 +39,9 @@ class Exception extends \Exception {
10401 => 'The Tokenizer has entered an invalid state', 10401 => 'The Tokenizer has entered an invalid state',
10501 => 'Form element expected, found %s', 10501 => 'Form element expected, found %s',
10502 => 'Element, Document, or DOMDocumentFragment expected; found %s']; 10502 => 'Element, Document, or DOMDocumentFragment expected; found %s',
10601 => 'Non-empty Document supplied as argument for Parser'];
public function __construct(int $code, ...$args) { public function __construct(int $code, ...$args) {
if (!isset(static::$messages[$code])) { if (!isset(static::$messages[$code])) {

15
lib/Parser.php

@ -53,10 +53,11 @@ class Parser {
} }
public function __destruct() { public function __destruct() {
$this->treeBuilder->__destruct();
static::$instance = null; static::$instance = null;
} }
public static function parse(string $data, bool $file = false) { public static function parse(string $data, Document $document = null, bool $file = false) {
// If parse() is called by parseFragment() then don't create an instance. It has // If parse() is called by parseFragment() then don't create an instance. It has
// already been created. // already been created.
$c = __CLASS__; $c = __CLASS__;
@ -64,8 +65,14 @@ class Parser {
static::$instance = new $c; static::$instance = new $c;
} }
if (is_null(static::$instance->DOM)) { if (is_null($document)) {
static::$instance->DOM = new DOM\Document(); static::$instance->DOM = new Document();
} else {
if ($document->hasChildNodes()) {
throw new Exception(Exception::PARSER_NONEMPTY_DOCUMENT);
}
static::$instance->DOM = $document;
} }
// Process the input stream. // Process the input stream.
@ -109,7 +116,7 @@ class Parser {
$c = __CLASS__; $c = __CLASS__;
static::$instance = new $c; static::$instance = new $c;
static::$instance->DOM = (is_null($context)) ? new DOM\Document() : $context->ownerDocument; static::$instance->DOM = (is_null($context)) ? new Document() : $context->ownerDocument;
static::$instance->DOMFragment = static::$instance->DOM->createDocumentFragment(); static::$instance->DOMFragment = static::$instance->DOM->createDocumentFragment();
// DEVIATION: The spec says to let the document be in quirks mode if the // DEVIATION: The spec says to let the document be in quirks mode if the

2
lib/TreeBuilder.php

@ -76,7 +76,7 @@ class TreeBuilder {
const QUIRKS_MODE_LIMITED = 2; const QUIRKS_MODE_LIMITED = 2;
public function __construct(DOM\Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Stack $templateInsertionModes, Tokenizer $tokenizer) { public function __construct(Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Stack $templateInsertionModes, Tokenizer $tokenizer) {
// If the form element isn't an instance of DOMElement that has a node name of // If the form element isn't an instance of DOMElement that has a node name of
// "form" or null then there's a problem. // "form" or null then there's a problem.
if (!is_null($formElement) && !($formElement instanceof DOMElement && $formElement->nodeName === 'form')) { if (!is_null($formElement) && !($formElement instanceof DOMElement && $formElement->nodeName === 'form')) {

Loading…
Cancel
Save