|
|
@ -14,77 +14,77 @@ class Tokenizer { |
|
|
|
|
|
|
|
public static $debug = false; |
|
|
|
|
|
|
|
const DATA_STATE = 0; |
|
|
|
const RCDATA_STATE = 1; |
|
|
|
const RAWTEXT_STATE = 2; |
|
|
|
const SCRIPT_DATA_STATE = 3; |
|
|
|
const PLAINTEXT_STATE = 4; |
|
|
|
const TAG_OPEN_STATE = 5; |
|
|
|
const END_TAG_OPEN_STATE = 6; |
|
|
|
const TAG_NAME_STATE = 7; |
|
|
|
const RCDATA_LESS_THAN_SIGN_STATE = 8; |
|
|
|
const RCDATA_END_TAG_OPEN_STATE = 9; |
|
|
|
const RCDATA_END_TAG_NAME_STATE = 10; |
|
|
|
const RAWTEXT_LESS_THAN_SIGN_STATE = 11; |
|
|
|
const RAWTEXT_END_TAG_OPEN_STATE = 12; |
|
|
|
const RAWTEXT_END_TAG_NAME_STATE = 13; |
|
|
|
const SCRIPT_DATA_LESS_THAN_SIGN_STATE = 14; |
|
|
|
const SCRIPT_DATA_END_TAG_OPEN_STATE = 15; |
|
|
|
const SCRIPT_DATA_END_TAG_NAME_STATE = 16; |
|
|
|
const SCRIPT_DATA_ESCAPE_START_STATE = 17; |
|
|
|
const SCRIPT_DATA_ESCAPE_START_DASH_STATE = 18; |
|
|
|
const SCRIPT_DATA_ESCAPED_STATE = 19; |
|
|
|
const SCRIPT_DATA_ESCAPED_DASH_STATE = 20; |
|
|
|
const SCRIPT_DATA_ESCAPED_DASH_DASH_STATE = 21; |
|
|
|
const SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE = 22; |
|
|
|
const SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE = 23; |
|
|
|
const SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE = 24; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE = 25; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_STATE = 26; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE = 27; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE = 28; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE = 29; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE = 30; |
|
|
|
const BEFORE_ATTRIBUTE_NAME_STATE = 31; |
|
|
|
const ATTRIBUTE_NAME_STATE = 32; |
|
|
|
const AFTER_ATTRIBUTE_NAME_STATE = 33; |
|
|
|
const BEFORE_ATTRIBUTE_VALUE_STATE = 34; |
|
|
|
const ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE = 35; |
|
|
|
const ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE = 36; |
|
|
|
const ATTRIBUTE_VALUE_UNQUOTED_STATE = 37; |
|
|
|
const AFTER_ATTRIBUTE_VALUE_QUOTED_STATE = 38; |
|
|
|
const SELF_CLOSING_START_TAG_STATE = 39; |
|
|
|
const BOGUS_COMMENT_STATE = 40; |
|
|
|
const MARKUP_DECLARATION_OPEN_STATE = 41; |
|
|
|
const COMMENT_START_STATE = 42; |
|
|
|
const COMMENT_START_DASH_STATE = 43; |
|
|
|
const COMMENT_STATE = 44; |
|
|
|
const COMMENT_LESS_THAN_SIGN_STATE = 45; |
|
|
|
const COMMENT_LESS_THAN_SIGN_BANG_STATE = 46; |
|
|
|
const COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE = 47; |
|
|
|
const COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE = 48; |
|
|
|
const COMMENT_END_DASH_STATE = 49; |
|
|
|
const COMMENT_END_STATE = 50; |
|
|
|
const COMMENT_END_BANG_STATE = 51; |
|
|
|
const DOCTYPE_STATE = 52; |
|
|
|
const BEFORE_DOCTYPE_NAME_STATE = 53; |
|
|
|
const DOCTYPE_NAME_STATE = 54; |
|
|
|
const AFTER_DOCTYPE_NAME_STATE = 55; |
|
|
|
const AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE = 56; |
|
|
|
const BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE = 57; |
|
|
|
const DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE = 58; |
|
|
|
const DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE = 59; |
|
|
|
const AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE = 60; |
|
|
|
const BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE = 61; |
|
|
|
const AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE = 62; |
|
|
|
const BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE = 63; |
|
|
|
const DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE = 64; |
|
|
|
const DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE = 65; |
|
|
|
const AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE = 66; |
|
|
|
const BOGUS_DOCTYPE_STATE = 67; |
|
|
|
const CDATA_SECTION_STATE = 68; |
|
|
|
const CDATA_SECTION_BRACKET_STATE = 69; |
|
|
|
const CDATA_SECTION_END_STATE = 70; |
|
|
|
const DATA_STATE = 1; |
|
|
|
const RCDATA_STATE = 2; |
|
|
|
const RAWTEXT_STATE = 3; |
|
|
|
const SCRIPT_DATA_STATE = 4; |
|
|
|
const PLAINTEXT_STATE = 5; |
|
|
|
const TAG_OPEN_STATE = 6; |
|
|
|
const END_TAG_OPEN_STATE = 7; |
|
|
|
const TAG_NAME_STATE = 8; |
|
|
|
const RCDATA_LESS_THAN_SIGN_STATE = 9; |
|
|
|
const RCDATA_END_TAG_OPEN_STATE = 10; |
|
|
|
const RCDATA_END_TAG_NAME_STATE = 11; |
|
|
|
const RAWTEXT_LESS_THAN_SIGN_STATE = 12; |
|
|
|
const RAWTEXT_END_TAG_OPEN_STATE = 13; |
|
|
|
const RAWTEXT_END_TAG_NAME_STATE = 14; |
|
|
|
const SCRIPT_DATA_LESS_THAN_SIGN_STATE = 15; |
|
|
|
const SCRIPT_DATA_END_TAG_OPEN_STATE = 16; |
|
|
|
const SCRIPT_DATA_END_TAG_NAME_STATE = 17; |
|
|
|
const SCRIPT_DATA_ESCAPE_START_STATE = 18; |
|
|
|
const SCRIPT_DATA_ESCAPE_START_DASH_STATE = 19; |
|
|
|
const SCRIPT_DATA_ESCAPED_STATE = 20; |
|
|
|
const SCRIPT_DATA_ESCAPED_DASH_STATE = 21; |
|
|
|
const SCRIPT_DATA_ESCAPED_DASH_DASH_STATE = 22; |
|
|
|
const SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE = 23; |
|
|
|
const SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE = 24; |
|
|
|
const SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE = 25; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE = 26; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_STATE = 27; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE = 28; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE = 29; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE = 30; |
|
|
|
const SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE = 31; |
|
|
|
const BEFORE_ATTRIBUTE_NAME_STATE = 32; |
|
|
|
const ATTRIBUTE_NAME_STATE = 33; |
|
|
|
const AFTER_ATTRIBUTE_NAME_STATE = 34; |
|
|
|
const BEFORE_ATTRIBUTE_VALUE_STATE = 35; |
|
|
|
const ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE = 36; |
|
|
|
const ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE = 37; |
|
|
|
const ATTRIBUTE_VALUE_UNQUOTED_STATE = 38; |
|
|
|
const AFTER_ATTRIBUTE_VALUE_QUOTED_STATE = 39; |
|
|
|
const SELF_CLOSING_START_TAG_STATE = 40; |
|
|
|
const BOGUS_COMMENT_STATE = 41; |
|
|
|
const MARKUP_DECLARATION_OPEN_STATE = 42; |
|
|
|
const COMMENT_START_STATE = 43; |
|
|
|
const COMMENT_START_DASH_STATE = 44; |
|
|
|
const COMMENT_STATE = 45; |
|
|
|
const COMMENT_LESS_THAN_SIGN_STATE = 46; |
|
|
|
const COMMENT_LESS_THAN_SIGN_BANG_STATE = 47; |
|
|
|
const COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE = 48; |
|
|
|
const COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE = 49; |
|
|
|
const COMMENT_END_DASH_STATE = 50; |
|
|
|
const COMMENT_END_STATE = 51; |
|
|
|
const COMMENT_END_BANG_STATE = 52; |
|
|
|
const DOCTYPE_STATE = 53; |
|
|
|
const BEFORE_DOCTYPE_NAME_STATE = 54; |
|
|
|
const DOCTYPE_NAME_STATE = 55; |
|
|
|
const AFTER_DOCTYPE_NAME_STATE = 56; |
|
|
|
const AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE = 57; |
|
|
|
const BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE = 58; |
|
|
|
const DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE = 59; |
|
|
|
const DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE = 60; |
|
|
|
const AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE = 61; |
|
|
|
const BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE = 62; |
|
|
|
const AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE = 63; |
|
|
|
const BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE = 64; |
|
|
|
const DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE = 65; |
|
|
|
const DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE = 66; |
|
|
|
const AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE = 67; |
|
|
|
const BOGUS_DOCTYPE_STATE = 68; |
|
|
|
const CDATA_SECTION_STATE = 69; |
|
|
|
const CDATA_SECTION_BRACKET_STATE = 70; |
|
|
|
const CDATA_SECTION_END_STATE = 71; |
|
|
|
|
|
|
|
const STATE_NAMES = [ |
|
|
|
self::DATA_STATE => "Data", |
|
|
|