|
|
@ -15,8 +15,6 @@ class Tokenizer { |
|
|
|
protected $stack; |
|
|
|
protected $temporaryBuffer = ""; |
|
|
|
|
|
|
|
public static $debug = false; |
|
|
|
|
|
|
|
const DATA_STATE = 1; |
|
|
|
const RCDATA_STATE = 2; |
|
|
|
const RAWTEXT_STATE = 3; |
|
|
@ -223,7 +221,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
|
|
|
|
protected function keepOrDiscardAttribute(TagToken $token, TokenAttr $attribute): void { |
|
|
|
// See 12.2.5.33 Attribute name state |
|
|
|
// See 13.2.5.33 Attribute name state |
|
|
|
|
|
|
|
# When the user agent leaves the attribute name state |
|
|
|
# (and before emitting the tag token, if appropriate), |
|
|
@ -259,7 +257,7 @@ class Tokenizer { |
|
|
|
return true; |
|
|
|
})()); |
|
|
|
|
|
|
|
# 12.2.5.1 Data state |
|
|
|
# 13.2.5.1 Data state |
|
|
|
if ($this->state === self::DATA_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -301,7 +299,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.2 RCDATA state |
|
|
|
# 13.2.5.2 RCDATA state |
|
|
|
elseif ($this->state === self::RCDATA_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -343,7 +341,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.3 RAWTEXT state |
|
|
|
# 13.2.5.3 RAWTEXT state |
|
|
|
elseif ($this->state === self::RAWTEXT_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -377,7 +375,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.4 Script data state |
|
|
|
# 13.2.5.4 Script data state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -411,7 +409,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.5 PLAINTEXT state |
|
|
|
# 13.2.5.5 PLAINTEXT state |
|
|
|
elseif ($this->state === self::PLAINTEXT_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -440,7 +438,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.6 Tag open state |
|
|
|
# 13.2.5.6 Tag open state |
|
|
|
elseif ($this->state === self::TAG_OPEN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -500,7 +498,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.7 End tag open state |
|
|
|
# 13.2.5.7 End tag open state |
|
|
|
elseif ($this->state === self::END_TAG_OPEN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -547,7 +545,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.8 Tag name state |
|
|
|
# 13.2.5.8 Tag name state |
|
|
|
elseif ($this->state === self::TAG_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -609,7 +607,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.9 RCDATA less-than sign state |
|
|
|
# 13.2.5.9 RCDATA less-than sign state |
|
|
|
elseif ($this->state === self::RCDATA_LESS_THAN_SIGN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -631,7 +629,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.10 RCDATA end tag open state |
|
|
|
# 13.2.5.10 RCDATA end tag open state |
|
|
|
elseif ($this->state === self::RCDATA_END_TAG_OPEN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -655,7 +653,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.11 RCDATA end tag name state |
|
|
|
# 13.2.5.11 RCDATA end tag name state |
|
|
|
elseif ($this->state === self::RCDATA_END_TAG_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -732,7 +730,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.12 RAWTEXT less-than sign state |
|
|
|
# 13.2.5.12 RAWTEXT less-than sign state |
|
|
|
elseif ($this->state === self::RAWTEXT_LESS_THAN_SIGN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -754,7 +752,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.13 RAWTEXT end tag open state |
|
|
|
# 13.2.5.13 RAWTEXT end tag open state |
|
|
|
elseif ($this->state === self::RAWTEXT_END_TAG_OPEN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -777,7 +775,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.14 RAWTEXT end tag name state |
|
|
|
# 13.2.5.14 RAWTEXT end tag name state |
|
|
|
elseif ($this->state === self::RAWTEXT_END_TAG_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -855,7 +853,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.15 Script data less-than sign state |
|
|
|
# 13.2.5.15 Script data less-than sign state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_LESS_THAN_SIGN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -885,7 +883,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.16 Script data end tag open state |
|
|
|
# 13.2.5.16 Script data end tag open state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_END_TAG_OPEN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -908,7 +906,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.17 Script data end tag name state |
|
|
|
# 13.2.5.17 Script data end tag name state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_END_TAG_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -985,7 +983,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.18 Script data escape start state |
|
|
|
# 13.2.5.18 Script data escape start state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPE_START_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1006,7 +1004,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.19 Script data escape start dash state |
|
|
|
# 13.2.5.19 Script data escape start dash state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPE_START_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1026,7 +1024,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.20 Script data escaped state |
|
|
|
# 13.2.5.20 Script data escaped state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1068,7 +1066,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.21 Script data escaped dash state |
|
|
|
# 13.2.5.21 Script data escaped dash state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPED_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1110,7 +1108,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.22 Script data escaped dash dash state |
|
|
|
# 13.2.5.22 Script data escaped dash dash state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPED_DASH_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1157,7 +1155,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.23 Script data escaped less-than sign state |
|
|
|
# 13.2.5.23 Script data escaped less-than sign state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1190,7 +1188,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.24 Script data escaped end tag open state |
|
|
|
# 13.2.5.24 Script data escaped end tag open state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1217,7 +1215,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.25 Script data escaped end tag name state |
|
|
|
# 13.2.5.25 Script data escaped end tag name state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1294,7 +1292,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.26 Script data double escape start state |
|
|
|
# 13.2.5.26 Script data double escape start state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1340,7 +1338,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.27 Script data double escaped state |
|
|
|
# 13.2.5.27 Script data double escaped state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1384,7 +1382,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.28 Script data double escaped dash state |
|
|
|
# 13.2.5.28 Script data double escaped dash state |
|
|
|
elseif ($this->state == self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1428,7 +1426,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.29 Script data double escaped dash dash state |
|
|
|
# 13.2.5.29 Script data double escaped dash dash state |
|
|
|
elseif ($this->state == self::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1477,7 +1475,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.30 Script data double escaped less-than sign state |
|
|
|
# 13.2.5.30 Script data double escaped less-than sign state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1499,7 +1497,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.31 Script data double escape end state |
|
|
|
# 13.2.5.31 Script data double escape end state |
|
|
|
elseif ($this->state === self::SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1548,7 +1546,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.32 Before attribute name state |
|
|
|
# 13.2.5.32 Before attribute name state |
|
|
|
elseif ($this->state === self::BEFORE_ATTRIBUTE_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1590,7 +1588,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.33 Attribute name state |
|
|
|
# 13.2.5.33 Attribute name state |
|
|
|
elseif ($this->state === self::ATTRIBUTE_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1649,7 +1647,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.34 After attribute name state |
|
|
|
# 13.2.5.34 After attribute name state |
|
|
|
elseif ($this->state === self::AFTER_ATTRIBUTE_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1697,7 +1695,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.35 Before attribute value state |
|
|
|
# 13.2.5.35 Before attribute value state |
|
|
|
elseif ($this->state === self::BEFORE_ATTRIBUTE_VALUE_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1737,7 +1735,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.36 Attribute value (double-quoted) state |
|
|
|
# 13.2.5.36 Attribute value (double-quoted) state |
|
|
|
elseif ($this->state === self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1780,7 +1778,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.37 Attribute value (single-quoted) state |
|
|
|
# 13.2.5.37 Attribute value (single-quoted) state |
|
|
|
elseif ($this->state === self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1824,7 +1822,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
# 12.2.5.38 Attribute value (unquoted) state |
|
|
|
# 13.2.5.38 Attribute value (unquoted) state |
|
|
|
elseif ($this->state === self::ATTRIBUTE_VALUE_UNQUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1888,7 +1886,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.39 After attribute value (quoted) state |
|
|
|
# 13.2.5.39 After attribute value (quoted) state |
|
|
|
elseif ($this->state === self::AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1931,7 +1929,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.40 Self-closing start tag state |
|
|
|
# 13.2.5.40 Self-closing start tag state |
|
|
|
elseif ($this->state === self::SELF_CLOSING_START_TAG_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -1963,7 +1961,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.44 Bogus comment state |
|
|
|
# 13.2.5.44 Bogus comment state |
|
|
|
elseif ($this->state === self::BOGUS_COMMENT_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2005,7 +2003,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.42 Markup declaration open state |
|
|
|
# 13.2.5.42 Markup declaration open state |
|
|
|
elseif ($this->state === self::MARKUP_DECLARATION_OPEN_STATE) { |
|
|
|
# If the next few characters are: |
|
|
|
|
|
|
@ -2058,7 +2056,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.43 Comment start state |
|
|
|
# 13.2.5.43 Comment start state |
|
|
|
elseif ($this->state === self::COMMENT_START_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2085,7 +2083,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.44 Comment start dash state |
|
|
|
# 13.2.5.44 Comment start dash state |
|
|
|
elseif ($this->state === self::COMMENT_START_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2127,7 +2125,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.45 Comment state |
|
|
|
# 13.2.5.45 Comment state |
|
|
|
elseif ($this->state === self::COMMENT_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2175,7 +2173,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.46 Comment less-than sign state |
|
|
|
# 13.2.5.46 Comment less-than sign state |
|
|
|
elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2200,7 +2198,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.47 Comment less-than sign bang state |
|
|
|
# 13.2.5.47 Comment less-than sign bang state |
|
|
|
elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_BANG_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2218,7 +2216,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.48 Comment less-than sign bang dash state |
|
|
|
# 13.2.5.48 Comment less-than sign bang dash state |
|
|
|
elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2236,7 +2234,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.49 Comment less-than sign bang dash dash state |
|
|
|
# 13.2.5.49 Comment less-than sign bang dash dash state |
|
|
|
elseif ($this->state === self::COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2258,7 +2256,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.50 Comment end dash state |
|
|
|
# 13.2.5.50 Comment end dash state |
|
|
|
elseif ($this->state === self::COMMENT_END_DASH_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2291,7 +2289,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.50 Comment end state |
|
|
|
# 13.2.5.50 Comment end state |
|
|
|
elseif ($this->state === self::COMMENT_END_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2340,7 +2338,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.52 Comment end bang state |
|
|
|
# 13.2.5.52 Comment end bang state |
|
|
|
elseif ($this->state === self::COMMENT_END_BANG_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2388,7 +2386,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.53 DOCTYPE state |
|
|
|
# 13.2.5.53 DOCTYPE state |
|
|
|
elseif ($this->state === self::DOCTYPE_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2434,7 +2432,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.54 Before DOCTYPE name state |
|
|
|
# 13.2.5.54 Before DOCTYPE name state |
|
|
|
elseif ($this->state === self::BEFORE_DOCTYPE_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2501,7 +2499,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.55 DOCTYPE name state |
|
|
|
# 13.2.5.55 DOCTYPE name state |
|
|
|
elseif ($this->state === self::DOCTYPE_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2558,7 +2556,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.56 After DOCTYPE name state |
|
|
|
# 13.2.5.56 After DOCTYPE name state |
|
|
|
elseif ($this->state === self::AFTER_DOCTYPE_NAME_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2626,7 +2624,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.57 After DOCTYPE public keyword state |
|
|
|
# 13.2.5.57 After DOCTYPE public keyword state |
|
|
|
elseif ($this->state === self::AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2695,7 +2693,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.58 Before DOCTYPE public identifier state |
|
|
|
# 13.2.5.58 Before DOCTYPE public identifier state |
|
|
|
elseif ($this->state === self::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2759,7 +2757,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.59 DOCTYPE public identifier (double-quoted) state |
|
|
|
# 13.2.5.59 DOCTYPE public identifier (double-quoted) state |
|
|
|
elseif ($this->state === self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2815,7 +2813,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.60 DOCTYPE public identifier (single-quoted) state |
|
|
|
# 13.2.5.60 DOCTYPE public identifier (single-quoted) state |
|
|
|
elseif ($this->state === self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2871,7 +2869,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.60 After DOCTYPE public identifier state |
|
|
|
# 13.2.5.60 After DOCTYPE public identifier state |
|
|
|
elseif ($this->state === self::AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2936,7 +2934,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.62 Between DOCTYPE public and system identifiers state |
|
|
|
# 13.2.5.62 Between DOCTYPE public and system identifiers state |
|
|
|
elseif ($this->state === self::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -2998,7 +2996,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.63 After DOCTYPE system keyword state |
|
|
|
# 13.2.5.63 After DOCTYPE system keyword state |
|
|
|
elseif ($this->state === self::AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3067,7 +3065,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.64 Before DOCTYPE system identifier state |
|
|
|
# 13.2.5.64 Before DOCTYPE system identifier state |
|
|
|
elseif ($this->state === self::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3133,7 +3131,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.64 DOCTYPE system identifier (double-quoted) state |
|
|
|
# 13.2.5.64 DOCTYPE system identifier (double-quoted) state |
|
|
|
elseif ($this->state === self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3188,7 +3186,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.66 DOCTYPE system identifier (single-quoted) state |
|
|
|
# 13.2.5.66 DOCTYPE system identifier (single-quoted) state |
|
|
|
elseif ($this->state === self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3243,7 +3241,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.67 After DOCTYPE system identifier state |
|
|
|
# 13.2.5.67 After DOCTYPE system identifier state |
|
|
|
elseif ($this->state === self::AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3288,7 +3286,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.67 Bogus DOCTYPE state |
|
|
|
# 13.2.5.67 Bogus DOCTYPE state |
|
|
|
elseif ($this->state === self::BOGUS_DOCTYPE_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3322,7 +3320,7 @@ class Tokenizer { |
|
|
|
# Ignore the character. |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.69 CDATA section state |
|
|
|
# 13.2.5.69 CDATA section state |
|
|
|
elseif ($this->state === self::CDATA_SECTION_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3350,7 +3348,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.70 CDATA section bracket state |
|
|
|
# 13.2.5.70 CDATA section bracket state |
|
|
|
elseif ($this->state === self::CDATA_SECTION_BRACKET_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3370,7 +3368,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.71 CDATA section end state |
|
|
|
# 13.2.5.71 CDATA section end state |
|
|
|
elseif ($this->state === self::CDATA_SECTION_END_STATE) { |
|
|
|
# Consume the next input character |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3417,7 +3415,7 @@ class Tokenizer { |
|
|
|
return true; |
|
|
|
})()); |
|
|
|
|
|
|
|
# 12.2.5.72 Character reference state |
|
|
|
# 13.2.5.72 Character reference state |
|
|
|
if ($this->state === self::CHARACTER_REFERENCE_STATE) { |
|
|
|
# Set the temporary buffer to the empty string. |
|
|
|
# Append a U+0026 AMPERSAND (&) character to the temporary buffer. |
|
|
@ -3448,7 +3446,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.73 Named character reference state |
|
|
|
# 13.2.5.73 Named character reference state |
|
|
|
elseif ($this->state === self::NAMED_CHARACTER_REFERENCE_STATE) { |
|
|
|
# Consume the maximum number of characters possible, |
|
|
|
# with the consumed characters matching one of the |
|
|
@ -3528,7 +3526,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.74 Ambiguous ampersand state |
|
|
|
# 13.2.5.74 Ambiguous ampersand state |
|
|
|
elseif ($this->state === self::AMBIGUOUS_AMPERSAND_STATE) { |
|
|
|
# Consume the next input character. |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3560,7 +3558,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.75 Numeric character reference state |
|
|
|
# 13.2.5.75 Numeric character reference state |
|
|
|
elseif ($this->state === self::NUMERIC_CHARACTER_REFERENCE_STATE) { |
|
|
|
# Set the character reference code to zero (0). |
|
|
|
$charRefCode = 0; |
|
|
@ -3583,7 +3581,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.76 Hexadecimal character reference start state |
|
|
|
# 13.2.5.76 Hexadecimal character reference start state |
|
|
|
elseif ($this->state === self::HEXADECIMAL_CHARACTER_REFERENCE_START_STATE) { |
|
|
|
# Consume the next input character. |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3609,7 +3607,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.77 Decimal character reference start state |
|
|
|
# 13.2.5.77 Decimal character reference start state |
|
|
|
elseif ($this->state === self::DECIMAL_CHARACTER_REFERENCE_START_STATE) { |
|
|
|
# Consume the next input character. |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3635,7 +3633,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.78 Hexadecimal character reference state |
|
|
|
# 13.2.5.78 Hexadecimal character reference state |
|
|
|
elseif ($this->state === self::HEXADECIMAL_CHARACTER_REFERENCE_STATE) { |
|
|
|
# Consume the next input character. |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3667,7 +3665,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.79 Decimal character reference state |
|
|
|
# 13.2.5.79 Decimal character reference state |
|
|
|
elseif ($this->state === self::DECIMAL_CHARACTER_REFERENCE_STATE) { |
|
|
|
# Consume the next input character. |
|
|
|
$char = $this->data->consume(); |
|
|
@ -3697,7 +3695,7 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# 12.2.5.80 Numeric character reference end state |
|
|
|
# 13.2.5.80 Numeric character reference end state |
|
|
|
elseif ($this->state === self::NUMERIC_CHARACTER_REFERENCE_END_STATE) { |
|
|
|
# Check the character reference code: |
|
|
|
|
|
|
|