From e8b3c76046d6cebcbf79d6591f5b0b0b6eeaf711 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Tue, 17 Dec 2019 13:47:53 -0500 Subject: [PATCH] Fix most failures Also removed assertions --- lib/Tokenizer.php | 163 +++++++--------------------------------------- 1 file changed, 22 insertions(+), 141 deletions(-) diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php index fc0333d..515d810 100644 --- a/lib/Tokenizer.php +++ b/lib/Tokenizer.php @@ -165,7 +165,7 @@ class Tokenizer { self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE => "DOCTYPE system identifier (double-quoted)", self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE => "DOCTYPE system identifier (single-quoted)", self::AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE => "After DOCTYPE system identifier", - self::BOGUS_DOCTYPE_STATE => "Bogus comment", + self::BOGUS_DOCTYPE_STATE => "Bogus DOCTYPE", self::CDATA_SECTION_STATE => "CDATA section", self::CHARACTER_REFERENCE_STATE => "Character reference", self::NAMED_CHARACTER_REFERENCE_STATE => "Named character reference", @@ -248,6 +248,8 @@ class Tokenizer { return true; })()); + $temporaryBuffer = ''; + while (true) { assert((function() { $state = self::STATE_NAMES[$this->state] ?? $this->state; @@ -566,7 +568,6 @@ class Tokenizer { elseif ($char === '>') { # Switch to the data state. Emit the current tag token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof TagToken); $this->sanitizeTag($token); return $token; } @@ -579,15 +580,15 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that are Uppercase ASCII characters to // prevent having to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->name .= strtolower($char.$this->data->consumeWhile(self::CTYPE_UPPER)); } # U+0000 NULL elseif ($char === "\0") { # This is an unexpected-null-character parse error. - # Emit a U+FFFD REPLACEMENT CHARACTER character token. + # Append a U+FFFD REPLACEMENT CHARACTER character to + # the current tag token's tag name. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - return new CharacterToken("\u{FFFD}"); + $token->name .= "\u{FFFD}"; } # EOF elseif ($char === '') { @@ -603,7 +604,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->name .= $char.$this->data->consumeUntil("\0\t\n\x0c />".self::CTYPE_UPPER); } } @@ -667,7 +667,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, then switch to the # before attribute name state. Otherwise, treat it as per the "anything else" # entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; } else { @@ -679,7 +678,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, then switch to the # self-closing start tag state. Otherwise, treat it as per the "anything else" # entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::SELF_CLOSING_START_TAG_STATE; } else { @@ -691,7 +689,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, then switch to the # data state and emit the current tag token. Otherwise, treat it as per the # "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::DATA_STATE; $this->sanitizeTag($token); @@ -716,9 +713,8 @@ class Tokenizer { // OPTIMIZATION: Combine upper and lower alpha // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); - assert(isset($temporaryBuffer)); - $token->name .= strtolower($char.$this->data->consumeWhile(self::CTYPE_ALPHA)); + $char .= $this->data->consumeWhile(self::CTYPE_ALPHA); + $token->name .= strtolower($char); $temporaryBuffer .= $char; } # Anything else @@ -793,7 +789,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the before attribute name state. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; } else { @@ -806,7 +801,6 @@ class Tokenizer { # then switch to the self-closing start tag state. # Otherwise, treat it as per the "anything else" # entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::SELF_CLOSING_START_TAG_STATE; } else { @@ -818,7 +812,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the data state and emit the current tag token. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::DATA_STATE; $this->sanitizeTag($token); @@ -843,9 +836,8 @@ class Tokenizer { // OPTIMIZATION: Combine upper and lower alpha // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); - assert(isset($temporaryBuffer)); - $token->name .= strtolower($char.$this->data->consumeWhile(self::CTYPE_ALPHA)); + $char .= $this->data->consumeWhile(self::CTYPE_ALPHA); + $token->name .= strtolower($char); $temporaryBuffer .= $char; } # Anything else @@ -928,7 +920,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the before attribute name state. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; } else { @@ -940,7 +931,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the self-closing start tag state. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::SELF_CLOSING_START_TAG_STATE; } else { @@ -952,7 +942,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the data state and emit the current tag token. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::DATA_STATE; $this->sanitizeTag($token); @@ -977,8 +966,6 @@ class Tokenizer { // OPTIMIZATION: Combine upper and lower alpha // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); - assert(isset($temporaryBuffer)); $token->name .= strtolower($char.strtolower($this->data->consumeWhile(self::CTYPE_ALPHA))); $temporaryBuffer .= $char; } @@ -1185,12 +1172,10 @@ class Tokenizer { # Emit a U+003C LESS-THAN SIGN character token. # Reconsume in the script data double escape start state. - // OPTIMIZATION: Avoid reconsuming - // Set the temporary buffer to the lowercase of the character - // Emit a less-than sign and the character without changing case - $temporaryBuffer = strtolower($char); + $temporaryBuffer = ''; $this->state = self::SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE; - return new CharacterToken('<'.$char); + $this->data->unconsume(); + return new CharacterToken('<'); } # Anything else else { @@ -1242,7 +1227,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the before attribute name state. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; } else { @@ -1254,7 +1238,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the self-closing start tag state. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::SELF_CLOSING_START_TAG_STATE; } else { @@ -1266,7 +1249,6 @@ class Tokenizer { # If the current end tag token is an appropriate end tag token, # then switch to the data state and emit the current tag token. # Otherwise, treat it as per the "anything else" entry below. - assert(isset($token) && $token instanceof Token); if ($token->name === $this->stack->currentNodeName) { $this->state = self::DATA_STATE; $this->sanitizeTag($token); @@ -1292,8 +1274,6 @@ class Tokenizer { // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having // to loop back through here every single time. $char .= $this->data->consumeWhile(self::CTYPE_ALPHA); - assert(isset($token) && $token instanceof Token); - assert(isset($temporaryBuffer)); $token->name .= strtolower($char); $temporaryBuffer .= $char; } @@ -1346,7 +1326,6 @@ class Tokenizer { // Consume all characters that are ASCII characters to prevent having // to loop back through here every single time. $char = $char.$this->data->consumeWhile(self::CTYPE_ALPHA); - assert(isset($temporaryBuffer)); $temporaryBuffer .= strtolower($char); return new CharacterToken($char); } @@ -1555,7 +1534,6 @@ class Tokenizer { // OPTIMIZATION: Consume all characters that are ASCII characters to prevent having // to loop back through here every single time. $char = $char.$this->data->consumeWhile(self::CTYPE_ALPHA); - assert(isset($temporaryBuffer)); $temporaryBuffer .= strtolower($char); return new CharacterToken($char); } @@ -1623,8 +1601,6 @@ class Tokenizer { # EOF if ($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ' || $char === '/' || $char === '>' || $char === '') { # Reconsume in the after attribute name state. - assert(isset($token) && $token instanceof Token); - assert(isset($attribute) && $attribute instanceof TokenAttr); $this->keepOrDiscardAttribute($token, $attribute); $this->data->unconsume(); $this->state = self::AFTER_ATTRIBUTE_NAME_STATE; @@ -1632,8 +1608,6 @@ class Tokenizer { # "=" (U+003D) elseif ($char === '=') { # Switch to the before attribute value state. - assert(isset($token) && $token instanceof Token); - assert(isset($attribute) && $attribute instanceof TokenAttr); $this->keepOrDiscardAttribute($token, $attribute); $this->state = self::BEFORE_ATTRIBUTE_VALUE_STATE; } @@ -1646,7 +1620,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that are uppercase ASCII letters to prevent // having to loop back through here every single time. - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->name .= strtolower($char.$this->data->consumeWhile(self::CTYPE_UPPER)); } # U+0000 NULL @@ -1654,7 +1627,6 @@ class Tokenizer { # This is an unexpected-null-character parse error. # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's name. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->name .= "\u{FFFD}"; } # U+0022 QUOTATION MARK (") @@ -1670,7 +1642,6 @@ class Tokenizer { else { attribute_name_state_anything_else: # Append the current input character to the current attribute's name. - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->name .= $char.$this->data->consumeUntil("\t\n\x0c /=>\0\"'<".self::CTYPE_UPPER); } } @@ -1702,7 +1673,6 @@ class Tokenizer { # Switch to the data state. # Emit the current tag token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); $this->sanitizeTag($token); return $token; } @@ -1753,7 +1723,6 @@ class Tokenizer { # Emit the current tag token. $this->error(ParseError::MISSING_ATTRIBUTE_VALUE); $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); $this->sanitizeTag($token); return $token; } @@ -1781,7 +1750,6 @@ class Tokenizer { # Switch to the character reference state. // DEVIATION: Character reference consumption implemented as a function - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= $this->switchToCharacterReferenceState(self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE); } # U+0000 NULL @@ -1789,7 +1757,6 @@ class Tokenizer { # This is an unexpected-null-character parse error. # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's value. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= "\u{FFFD}"; } # EOF @@ -1806,7 +1773,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= $char.$this->data->consumeUntil("\"&\0"); } } @@ -1827,7 +1793,6 @@ class Tokenizer { # Switch to the character reference state. // DEVIATION: Character reference consumption implemented as a function - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= $this->switchToCharacterReferenceState(self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE); } # U+0000 NULL @@ -1835,7 +1800,6 @@ class Tokenizer { # This is an unexpected-null-character parse error. # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's value. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= "\u{FFFD}"; } # EOF @@ -1852,7 +1816,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= $char.$this->data->consumeUntil("'&\0"); } } @@ -1869,7 +1832,7 @@ class Tokenizer { # U+0020 SPACE if ($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { # Switch to the before attribute name state. - $this->state = self::BEFORE_ATTRIBUTE_VALUE_STATE; + $this->state = self::BEFORE_ATTRIBUTE_NAME_STATE; } # U+0026 AMPERSAND (&) elseif ($char === '&') { @@ -1877,14 +1840,12 @@ class Tokenizer { # Switch to the character reference state. // DEVIATION: Character reference consumption implemented as a function - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= $this->switchToCharacterReferenceState(self::ATTRIBUTE_VALUE_UNQUOTED_STATE); } # ">" (U+003E) elseif ($char === '>') { # Switch to the data state. Emit the current tag token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); $this->sanitizeTag($token); return $token; } @@ -1893,7 +1854,6 @@ class Tokenizer { # This is an unexpected-null-character parse error. # Append a U+FFFD REPLACEMENT CHARACTER character to the current attribute's value. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= "\u{FFFD}"; } # U+0022 QUOTATION MARK (") @@ -1921,7 +1881,6 @@ class Tokenizer { // OPTIMIZATION: Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($attribute) && $attribute instanceof TokenAttr); $attribute->value .= $char.$this->data->consumeUntil("\t\n\x0c &>\0\"'<=`"); } } @@ -1949,7 +1908,6 @@ class Tokenizer { # Switch to the data state. # Emit the current tag token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); $this->sanitizeTag($token); return $token; } @@ -1980,7 +1938,6 @@ class Tokenizer { # Set the self-closing flag of the current tag token. # Switch to the data state. # Emit the current tag token. - assert(isset($token) && $token instanceof Token); $token->selfClosing = true; $this->state = self::DATA_STATE; $this->sanitizeTag($token); @@ -2013,7 +1970,6 @@ class Tokenizer { # Switch to the data state. # Emit the comment token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # EOF @@ -2026,7 +1982,6 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # U+0000 NULL @@ -2034,7 +1989,6 @@ class Tokenizer { # This is an unexpected-null-character parse error. # Append a U+FFFD REPLACEMENT CHARACTER character to the comment token's data. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($token) && $token instanceof Token); $token->data .= "\u{FFFD}"; } # Anything else @@ -2044,7 +1998,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->data .= $char.$this->data->consumeUntil(">\0"); } } @@ -2119,7 +2072,6 @@ class Tokenizer { # Emit the comment token. $this->error(ParseError::ABRUPT_CLOSING_OF_EMPTY_COMMENT); $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # Anything else @@ -2147,7 +2099,6 @@ class Tokenizer { # Emit the comment token. $this->error(ParseError::ABRUPT_CLOSING_OF_EMPTY_COMMENT); $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # EOF @@ -2161,14 +2112,12 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # Anything else else { # Append a U+002D HYPHEN-MINUS character (-) to the comment token's data. # Reconsume in the comment state. - assert(isset($token) && $token instanceof Token); $token->data .= '-'; $this->data->unconsume(); $this->state = self::COMMENT_STATE; @@ -2184,7 +2133,6 @@ class Tokenizer { if ($char === '<') { # Append the current input character to the comment token's data. # Switch to the comment less-than sign state. - assert(isset($token) && $token instanceof Token); $token->data .= $char; $this->state = self::COMMENT_LESS_THAN_SIGN_STATE; } @@ -2198,7 +2146,6 @@ class Tokenizer { # This is an unexpected-null-character parse error. # Append a U+FFFD REPLACEMENT CHARACTER character to the comment token's data. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($token) && $token instanceof Token); $token->data .= "\u{FFFD}"; } # EOF @@ -2212,7 +2159,6 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # Anything else @@ -2222,7 +2168,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->data .= $char.$this->data->consumeUntil("<-\0"); } } @@ -2236,14 +2181,12 @@ class Tokenizer { if ($char === '!') { # Append the current input character to the comment token's data. # Switch to the comment less-than sign bang state. - assert(isset($token) && $token instanceof Token); $token->data .= $char; $this->state = self::COMMENT_LESS_THAN_SIGN_BANG_STATE; } # U+003C LESS-THAN SIGN (<) elseif ($char ==='<') { # Append the current input character to the comment token's data. - assert(isset($token) && $token instanceof Token); $token->data .= $char; } # Anything else @@ -2333,14 +2276,12 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # Anything else else { # Append a "-" (U+002D) character to the comment token's data. # Reconsume in the comment state. - assert(isset($token) && $token instanceof Token); $token->data .= '-'; $this->state = self::COMMENT_STATE; $this->data->unconsume(); @@ -2357,7 +2298,6 @@ class Tokenizer { # Switch to the data state. # Emit the comment token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # "!" (U+0021) @@ -2385,14 +2325,12 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # Anything else else { # Append two U+002D HYPHEN-MINUS characters (-) to the comment token's data. # Reconsume in the comment state. - assert(isset($token) && $token instanceof Token); $token->data .= '--'; $this->state = self::COMMENT_STATE; $this->data->unconsume(); @@ -2410,7 +2348,6 @@ class Tokenizer { # and a U+0021 EXCLAMATION MARK character (!) # to the comment token's data. # Switch to the comment end dash state. - assert(isset($token) && $token instanceof Token); $token->data .= '--!'; $this->state = self::COMMENT_END_DASH_STATE; } @@ -2421,7 +2358,6 @@ class Tokenizer { # Emit the comment token. $this->error(ParseError::INCORRECTLY_CLOSED_COMMENT); $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # EOF @@ -2435,7 +2371,6 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # Anything else @@ -2444,8 +2379,7 @@ class Tokenizer { # and a U+0021 EXCLAMATION MARK character (!) # to the comment token's data. # Reconsume in the comment state. - assert(isset($token) && $token instanceof Token); - $token->data .= '--!'.$char; + $token->data .= '--!'; $this->state = self::COMMENT_STATE; $this->data->unconsume(); } @@ -2479,7 +2413,6 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # Anything else @@ -2487,7 +2420,7 @@ class Tokenizer { # This is a missing-whitespace-before-doctype-name parse error. # Reconsume in the before DOCTYPE name state. $this->error(ParseError::MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME); - $this->state = self::DOCTYPE_NAME_STATE; + $this->state = self::BEFORE_DOCTYPE_NAME_STATE; $this->data->unconsume(); } } @@ -2577,7 +2510,6 @@ class Tokenizer { # Switch to the data state. # Emit the current DOCTYPE token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } // See below for ASCII upper alpha @@ -2587,7 +2519,6 @@ class Tokenizer { # Append a U+FFFD REPLACEMENT CHARACTER character # to the current DOCTYPE token's name. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($token) && $token instanceof Token); $token->name .= "\u{FFFD}"; } # EOF @@ -2597,7 +2528,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -2615,7 +2545,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->name .= strtolower($char.$this->data->consumeUntil("\t\n\x0c >\0")); } } @@ -2637,7 +2566,6 @@ class Tokenizer { # Switch to the data state. # Emit the current DOCTYPE token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # EOF @@ -2647,7 +2575,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -2681,11 +2608,12 @@ class Tokenizer { # parse error. # Set the DOCTYPE token's force-quirks flag to on. # Reconsume in the bogus DOCTYPE state. - $this->error(ParseError::INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME); - assert(isset($token) && $token instanceof Token); - $token->forceQuirks = true; - $this->state = self::BOGUS_DOCTYPE_STATE; - $this->data->unconsume(); + else { + $this->error(ParseError::INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME); + $token->forceQuirks = true; + $this->state = self::BOGUS_DOCTYPE_STATE; + $this->data->unconsume(); + } } } @@ -2708,7 +2636,6 @@ class Tokenizer { # Set the DOCTYPE token's public identifier to the empty string (not missing), # then switch to the DOCTYPE public identifier (double-quoted) state. $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD); - assert(isset($token) && $token instanceof Token); $token->public = ''; $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE; } @@ -2718,7 +2645,6 @@ class Tokenizer { # Set the DOCTYPE token's public identifier to the empty string (not missing), # then switch to the DOCTYPE public identifier (single-quoted) state. $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD); - assert(isset($token) && $token instanceof Token); $token->public = ''; $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE; } @@ -2729,7 +2655,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::MISSING_DOCTYPE_PUBLIC_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -2741,7 +2666,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -2756,7 +2680,6 @@ class Tokenizer { # Set the DOCTYPE token's force-quirks flag to on. # Reconsume in the bogus DOCTYPE state. $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::BOGUS_DOCTYPE_STATE; $this->data->unconsume(); @@ -2779,7 +2702,6 @@ class Tokenizer { elseif ($char === '"') { # Set the DOCTYPE token's public identifier to the empty string (not missing), # then switch to the DOCTYPE public identifier (double-quoted) state. - assert(isset($token) && $token instanceof Token); $token->public = ''; $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE; } @@ -2787,7 +2709,6 @@ class Tokenizer { elseif ($char === "'") { # Set the DOCTYPE token's public identifier to the empty string (not missing), # then switch to the DOCTYPE public identifier (single-quoted) state. - assert(isset($token) && $token instanceof Token); $token->public = ''; $this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE; } @@ -2798,7 +2719,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::MISSING_DOCTYPE_PUBLIC_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -2810,7 +2730,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -2825,7 +2744,6 @@ class Tokenizer { # Set the DOCTYPE token's force-quirks flag to on. # Reconsume in the bogus DOCTYPE state. $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::BOGUS_DOCTYPE_STATE; $this->data->unconsume(); @@ -2848,7 +2766,6 @@ class Tokenizer { # Append a U+FFFD REPLACEMENT CHARACTER character # to the current DOCTYPE token's public identifier. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($token) && $token instanceof Token); $token->public .= "\u{FFFD}"; } # ">" (U+003E) @@ -2858,7 +2775,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -2870,7 +2786,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -2887,7 +2802,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->public .= $char.$this->data->consumeUntil("\">\0"); } } @@ -2908,7 +2822,6 @@ class Tokenizer { # Append a U+FFFD REPLACEMENT CHARACTER character # to the current DOCTYPE token's public identifier. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($token) && $token instanceof Token); $token->public .= "\u{FFFD}"; } # ">" (U+003E) @@ -2918,7 +2831,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -2930,7 +2842,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -2947,7 +2858,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->public .= $char.$this->data->consumeUntil("'>\0"); } } @@ -2970,7 +2880,6 @@ class Tokenizer { # Switch to the data state. # Emit the current DOCTYPE token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # U+0022 QUOTATION MARK (") @@ -2998,7 +2907,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -3013,7 +2921,6 @@ class Tokenizer { # Set the DOCTYPE token's force-quirks flag to on. # Reconsume in the bogus DOCTYPE state. $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::BOGUS_DOCTYPE_STATE; $this->data->unconsume(); @@ -3037,7 +2944,6 @@ class Tokenizer { # Switch to the data state. # Emit the current DOCTYPE token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # U+0022 QUOTATION MARK (") @@ -3063,7 +2969,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -3078,7 +2983,6 @@ class Tokenizer { # Set the DOCTYPE token's force-quirks flag to on. # Reconsume in the bogus DOCTYPE state. $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::BOGUS_DOCTYPE_STATE; } @@ -3103,7 +3007,6 @@ class Tokenizer { # Set the DOCTYPE token's system identifier to the empty string (not missing), # then switch to the DOCTYPE system identifier (double-quoted) state. $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD); - assert(isset($token) && $token instanceof Token); $token->system = ''; $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; } @@ -3113,7 +3016,6 @@ class Tokenizer { # Set the DOCTYPE token's system identifier to the empty string (not missing), # then switch to the DOCTYPE system identifier (single-quoted) state. $this->error(ParseError::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD); - assert(isset($token) && $token instanceof Token); $token->system = ''; $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; } @@ -3124,7 +3026,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::MISSING_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -3136,7 +3037,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -3151,7 +3051,6 @@ class Tokenizer { # Set the DOCTYPE token's force-quirks flag to on. # Reconsume in the bogus DOCTYPE state. $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::BOGUS_DOCTYPE_STATE; $this->data->unconsume(); @@ -3175,7 +3074,6 @@ class Tokenizer { # Set the DOCTYPE token's system identifier to the # empty string (not missing), then switch to the # DOCTYPE system identifier (double-quoted) state. - assert(isset($token) && $token instanceof Token); $token->system = ''; $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; } @@ -3184,7 +3082,6 @@ class Tokenizer { # Set the DOCTYPE token's system identifier to the # empty string (not missing), then switch to the # DOCTYPE system identifier (single-quoted) state. - assert(isset($token) && $token instanceof Token); $token->system = ''; $this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; } @@ -3195,7 +3092,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::MISSING_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -3207,7 +3103,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -3222,7 +3117,6 @@ class Tokenizer { # Set the DOCTYPE token's force-quirks flag to on. # Reconsume in the bogus DOCTYPE state. $this->error(ParseError::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::BOGUS_DOCTYPE_STATE; $this->data->unconsume(); @@ -3245,7 +3139,6 @@ class Tokenizer { # Append a U+FFFD REPLACEMENT CHARACTER character # to the current DOCTYPE token's system identifier. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($token) && $token instanceof Token); $token->system .= "\u{FFFD}"; } # ">" (U+003E) @@ -3255,7 +3148,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -3267,7 +3159,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -3283,7 +3174,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->system .= $char.$this->data->consumeUntil("\"\0>"); } } @@ -3304,7 +3194,6 @@ class Tokenizer { # Append a U+FFFD REPLACEMENT CHARACTER character # to the current DOCTYPE token's system identifier. $this->error(ParseError::UNEXPECTED_NULL_CHARACTER); - assert(isset($token) && $token instanceof Token); $token->system .= "\u{FFFD}"; } # ">" (U+003E) @@ -3314,7 +3203,6 @@ class Tokenizer { # Switch to the data state. # Emit that DOCTYPE token. $this->error(ParseError::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; $this->state = self::DATA_STATE; return $token; @@ -3326,7 +3214,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -3342,7 +3229,6 @@ class Tokenizer { // OPTIMIZATION: // Consume all characters that aren't listed above to prevent having // to loop back through here every single time. - assert(isset($token) && $token instanceof Token); $token->system .= $char.$this->data->consumeUntil("'\0>"); } } @@ -3364,7 +3250,6 @@ class Tokenizer { # Switch to the data state. # Emit the current DOCTYPE token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # EOF @@ -3374,7 +3259,6 @@ class Tokenizer { # Emit that DOCTYPE token. # Emit an end-of-file token. $this->error(ParseError::EOF_IN_DOCTYPE); - assert(isset($token) && $token instanceof Token); $token->forceQuirks = true; // DEVIATION: // We cannot emit two tokens, so we switch to @@ -3389,7 +3273,6 @@ class Tokenizer { # Reconsume in the bogus DOCTYPE state. # (This does not set the DOCTYPE token's force-quirks flag to on.) $this->error(ParseError::UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, $char); - assert(isset($token) && $token instanceof Token); $this->state = self::BOGUS_DOCTYPE_STATE; $this->data->unconsume(); } @@ -3405,7 +3288,6 @@ class Tokenizer { # Switch to the data state. # Emit the DOCTYPE token. $this->state = self::DATA_STATE; - assert(isset($token) && $token instanceof Token); return $token; } # U+0000 NULL @@ -3424,7 +3306,6 @@ class Tokenizer { // the data state, which will emit the EOF token $this->state = self::DATA_STATE; $this->data->unconsume(); - assert(isset($token) && $token instanceof Token); return $token; } # Anything else