A modern, accurate HTML parser and serializer for PHP
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

168 lines
16 KiB

<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\Parser;
class ParseError {
// tokenization parse errors; these have been standardized
public const ENCODING_ERROR = 100;
public const UNEXPECTED_NULL_CHARACTER = 101;
public const UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME = 102;
public const EOF_BEFORE_TAG_NAME = 103;
public const INVALID_FIRST_CHARACTER_OF_TAG_NAME = 104;
public const MISSING_END_TAG_NAME = 105;
public const EOF_IN_TAG = 106;
public const EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT = 107;
public const UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME = 108;
public const DUPLICATE_ATTRIBUTE = 109;
public const UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME = 110;
public const MISSING_ATTRIBUTE_VALUE = 111;
public const UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE = 112;
public const MISSING_WHITESPACE_BETWEEN_ATTRIBUTES = 113;
public const UNEXPECTED_SOLIDUS_IN_TAG = 114;
public const CDATA_IN_HTML_CONTENT = 115;
public const INCORRECTLY_OPENED_COMMENT = 116;
public const ABRUPT_CLOSING_OF_EMPTY_COMMENT = 117;
public const EOF_IN_COMMENT = 118;
public const NESTED_COMMENT = 119;
public const INCORRECTLY_CLOSED_COMMENT = 120;
public const EOF_IN_DOCTYPE = 121;
public const MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME = 122;
public const MISSING_DOCTYPE_NAME = 123;
public const INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME = 124;
public const MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD = 125;
public const MISSING_DOCTYPE_PUBLIC_IDENTIFIER = 126;
public const MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 127;
public const ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER = 128;
public const MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 129;
public const MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD = 130;
public const MISSING_DOCTYPE_SYSTEM_IDENTIFIER = 131;
public const MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 132;
public const ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER = 133;
public const UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 134;
public const EOF_IN_CDATA = 135;
public const END_TAG_WITH_ATTRIBUTES = 136;
public const END_TAG_WITH_TRAILING_SOLIDUS = 137;
public const MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE = 138;
public const UNKNOWN_NAMED_CHARACTER_REFERENCE = 139;
public const ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE = 140;
public const NULL_CHARACTER_REFERENCE = 141;
public const CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE = 142;
public const SURROGATE_CHARACTER_REFERENCE = 143;
public const NONCHARACTER_CHARACTER_REFERENCE = 144;
public const CONTROL_CHARACTER_REFERENCE = 145;
public const SURROGATE_IN_INPUT_STREAM = 146;
public const NONCHARACTER_IN_INPUT_STREAM = 147;
public const CONTROL_CHARACTER_IN_INPUT_STREAM = 148;
// tree construction parse errors; these have not been standardized, but html5lib's error names are likely to become standard in future
public const EXPECTED_DOCTYPE_BUT_GOT_START_TAG = 200;
public const EXPECTED_DOCTYPE_BUT_GOT_END_TAG = 201;
public const EXPECTED_DOCTYPE_BUT_GOT_CHARS = 202;
public const EXPECTED_DOCTYPE_BUT_GOT_EOF = 203;
public const UNKNOWN_DOCTYPE = 204;
public const UNEXPECTED_DOCTYPE = 205;
public const UNEXPECTED_START_TAG = 206;
public const UNEXPECTED_END_TAG = 207; // html5lib also uses 'adoption-agency-1.2' and 'adoption-agency-1.3' for this
public const NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS = 208;
public const UNEXPECTED_START_TAG_IMPLIES_END_TAG = 209;
public const UNEXPECTED_START_TAG_ALIAS = 210; // html5lib uses 'unexpected-start-tag-treated-as'
public const UNEXPECTED_CHAR = 211;
public const UNEXPECTED_EOF = 212;
public const UNEXPECTED_PARENT = 213;
public const INVALID_NAMESPACE_ATTRIBUTE_VALUE = 214;
public const FOSTERED_START_TAG = 215;
public const FOSTERED_END_TAG = 216;
public const FOSTERED_CHAR = 217;
public const UNEXPECTED_NULL_CHARACTER_OMIT = 218;
public const UNEXPECTED_NULL_CHARACTER_REPLACE = 219;
public const MESSAGES = [
self::EXPECTED_DOCTYPE_BUT_GOT_START_TAG => 'Expected DOCTYPE but got start tag <%s>',
self::EXPECTED_DOCTYPE_BUT_GOT_END_TAG => 'Expected DOCTYPE but got end tag </%s>',
self::EXPECTED_DOCTYPE_BUT_GOT_CHARS => 'Expected DOCTYPE but got characters',
self::EXPECTED_DOCTYPE_BUT_GOT_EOF => 'Expected DOCTYPE but got end-of-file',
self::UNKNOWN_DOCTYPE => 'Unknown DOCTYPE',
self::UNEXPECTED_DOCTYPE => 'Unexpected DOCTYPE',
self::UNEXPECTED_START_TAG => 'Unexpected start tag <%s>',
self::UNEXPECTED_END_TAG => 'Unexpected end tag </%s>',
self::NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS => 'Trailing solidus in non-void HTML element start tag <%s>',
self::UNEXPECTED_START_TAG_IMPLIES_END_TAG => 'Unexpcted non-nesting start tag <%s> in nested context',
self::UNEXPECTED_START_TAG_ALIAS => 'Start tag <%s> should be <%s>',
self::UNEXPECTED_CHAR => 'Unexpected character data',
self::UNEXPECTED_EOF => 'Unexpected end of file',
self::UNEXPECTED_PARENT => 'Start tag <%s> not valid in parent <%s>',
self::INVALID_NAMESPACE_ATTRIBUTE_VALUE => 'Invalid value for attribute "%s"; it must have value "%s" or be omitted',
self::FOSTERED_START_TAG => 'Start tag <%s> moved to before table',
self::FOSTERED_END_TAG => 'End tag </%s> moved to before table',
self::FOSTERED_CHAR => 'Character moved to before table',
self::UNEXPECTED_NULL_CHARACTER_OMIT => 'Unexpected null character; omitted from document',
self::UNEXPECTED_NULL_CHARACTER_REPLACE => 'Unexpected null character; replacement character substituted',
self::ENCODING_ERROR => 'Corrupt encoding near byte position %s',
self::UNEXPECTED_NULL_CHARACTER => 'Unexpected null character',
self::UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME => 'Unexpected "?" character instead of tag name',
self::EOF_BEFORE_TAG_NAME => 'End-of-file before tag name',
self::INVALID_FIRST_CHARACTER_OF_TAG_NAME => 'Invalid first character "%s" of tag name',
self::MISSING_END_TAG_NAME => 'Missing end-tag name',
self::EOF_IN_TAG => 'End-of-file in tag',
self::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT => 'End-of-file in script (HTML comment-like) text',
self::UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME => 'Unexpected equals sign before attribute name',
self::DUPLICATE_ATTRIBUTE => 'Duplicate attribute "%s" in start tag',
self::UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME => 'Unexpected character "%s" in attribute name',
self::MISSING_ATTRIBUTE_VALUE => 'Missing attribute value',
self::UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE => 'Unexpected character "%s" in unquoted attribute value',
self::MISSING_WHITESPACE_BETWEEN_ATTRIBUTES => 'Missing whitespace between attributes',
self::UNEXPECTED_SOLIDUS_IN_TAG => 'Unexpected solidus in tag',
self::CDATA_IN_HTML_CONTENT => 'CDATA in HTML content',
self::INCORRECTLY_OPENED_COMMENT => 'Incorrectly opened comment',
self::ABRUPT_CLOSING_OF_EMPTY_COMMENT => 'Abrupt closing of empty comment',
self::EOF_IN_COMMENT => 'End-of-file in comment',
self::NESTED_COMMENT => 'Nested comment',
self::INCORRECTLY_CLOSED_COMMENT => 'Incorrectly closed comment',
self::EOF_IN_DOCTYPE => 'End-of-file in DOCTYPE',
self::MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME => 'Missing whitespace before DOCTYPE name',
self::MISSING_DOCTYPE_NAME => 'Missing DOCTYPE name',
self::INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME => 'Invalid character sequence after DOCTYPE name',
self::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD => 'Missing whitespace after DOCTYPE "PUBLIC" keyword',
self::MISSING_DOCTYPE_PUBLIC_IDENTIFIER => 'Missing DOCTYPE "PUBLIC" identifier',
self::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER => 'Missing quote before DOCTYPE "PUBLIC" identifier',
self::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER => 'Abrupt DOCTYPE "PUBLIC" identifier',
self::MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS => 'Missing whitespace between DOCTYPE "PUBLIC" and "SYSTEM" identifiers',
self::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD => 'Missing whitespace after DOCTYPE "SYSTEM" keyword',
self::MISSING_DOCTYPE_SYSTEM_IDENTIFIER => 'Missing DOCTYPE "SYSTEM" identifier',
self::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER => 'Missing quote before DOCTYPE "SYSTEM" identifier',
self::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER => 'Abrupt DOCTYPE "SYSTEM" identifier',
self::UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER => 'Unexpected character "%s" after DOCTYPE "SYSTEM" identifier',
self::EOF_IN_CDATA => 'End-of-file in CDATA section',
self::END_TAG_WITH_ATTRIBUTES => 'End-tag with attributes',
self::END_TAG_WITH_TRAILING_SOLIDUS => 'End-tag with trailing solidus',
self::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE => 'Missing semicolon after character reference',
self::UNKNOWN_NAMED_CHARACTER_REFERENCE => 'Unknown named character reference "%s"',
self::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE => 'Absence of digits in character reference',
self::NULL_CHARACTER_REFERENCE => 'Null character reference',
self::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE => 'Character reference outside Unicode range',
self::SURROGATE_CHARACTER_REFERENCE => 'Surrogate character rereference',
self::NONCHARACTER_CHARACTER_REFERENCE => 'Non-character character reference',
self::CONTROL_CHARACTER_REFERENCE => 'Control-character character reference',
self::SURROGATE_IN_INPUT_STREAM => 'Surrogate character in input stream',
self::NONCHARACTER_IN_INPUT_STREAM => 'Non-character character in input stream',
self::CONTROL_CHARACTER_IN_INPUT_STREAM => 'Control character in input stream',
];
public const REPORT_OFFSETS = [
self::INCORRECTLY_OPENED_COMMENT => 1,
self::SURROGATE_CHARACTER_REFERENCE => 1,
self::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE => 1,
self::NONCHARACTER_CHARACTER_REFERENCE => 1,
self::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE => 1,
self::NULL_CHARACTER_REFERENCE => 1,
self::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE => 1,
self::CONTROL_CHARACTER_REFERENCE => 1,
self::UNKNOWN_NAMED_CHARACTER_REFERENCE => 1,
];
public $errors = [];
}