Browse Source

Correct unknown DOCTYPE checking

split-manual
J. King 3 years ago
parent
commit
6798c128e4
  1. 10
      lib/ParseError.php
  2. 2
      lib/Token.php
  3. 6
      lib/TreeBuilder.php

10
lib/ParseError.php

@ -60,16 +60,18 @@ class ParseError {
const EXPECTED_DOCTYPE_BUT_GOT_END_TAG = 201;
const EXPECTED_DOCTYPE_BUT_GOT_CHARS = 202;
const EXPECTED_DOCTYPE_BUT_GOT_EOF = 203;
const UNEXPECTED_DOCTYPE = 204;
const UNEXPECTED_START_TAG = 205;
const UNEXPECTED_END_TAG = 206; // html5lib also uses 'adoption-agency-1.2' and 'adoption-agency-1.3' for this
const NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS = 207;
const UNKNOWN_DOCTYPE = 204;
const UNEXPECTED_DOCTYPE = 205;
const UNEXPECTED_START_TAG = 206;
const UNEXPECTED_END_TAG = 207; // html5lib also uses 'adoption-agency-1.2' and 'adoption-agency-1.3' for this
const NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS = 208;
const MESSAGES = [
self::EXPECTED_DOCTYPE_BUT_GOT_START_TAG => 'Expected DOCTYPE but got start tag',
self::EXPECTED_DOCTYPE_BUT_GOT_END_TAG => 'Expected DOCTYPE but got end tag',
self::EXPECTED_DOCTYPE_BUT_GOT_CHARS => 'Expected DOCTYPE but got characters',
self::EXPECTED_DOCTYPE_BUT_GOT_EOF => 'Expected DOCTYPE but got end-of-file',
self::UNKNOWN_DOCTYPE => 'Unknown DOCTYPE',
self::UNEXPECTED_START_TAG => 'Unexpected start tag',
self::UNEXPECTED_END_TAG => 'Unexpected end tag',
self::NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS => 'Trailing solidus in non-void HTML element start tag',

2
lib/Token.php

@ -27,7 +27,7 @@ class DOCTYPEToken extends Token {
public $public;
public $system;
public function __construct(string $name = null, string $public = null, string $system = null) {
public function __construct(?string $name = null, ?string $public = null, ?string $system = null) {
// null stands in for the distinct "missing" state
$this->name = $name;
$this->public = $public;

6
lib/TreeBuilder.php

@ -240,8 +240,8 @@ class TreeBuilder {
# If the DOCTYPE token's name is not "html", or the token's public identifier is
# not missing, or the token's system identifier is neither missing nor
# "about:legacy-compat", then there is a parse error.
if ($token->name !== 'html' || $token->public !== '' || ($token->system !== '' && $token->system !== 'about:legacy-compat')) {
$this->error(ParseError::INVALID_DOCTYPE);
if ($token->name !== 'html' || $token->public !== null || !($token->system === null || $token->system === 'about:legacy-compat')) {
$this->error(ParseError::UNKNOWN_DOCTYPE);
}
# Append a DocumentType node to the Document node, with the name attribute set
@ -1926,7 +1926,7 @@ class TreeBuilder {
}
}
public static function insertCommentToken(CommentToken $token, \DOMNode $position = null) {
public function insertCommentToken(CommentToken $token, \DOMNode $position = null) {
# When the steps below require the user agent to insert a comment while
# processing a comment token, optionally with an explicitly insertion position
# position, the user agent must run the following steps:

Loading…
Cancel
Save