HTML-DOM/lib/Data.php
J. King 6b42f08fbc Change some if-the-exception blocks to assertions
This has only been done some parts of the code that are internal
to the parser at large.
2019-12-12 17:35:24 -05:00

520 lines
39 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
declare(strict_types=1);
namespace dW\HTML5;
class Data {
use ParseErrorEmitter;
// Used to get the file path for error reporting.
public $filePath;
// Internal storage for the Intl data object.
protected $data;
// Used for error reporting to display line number.
protected $_line = 1;
// Used for error reporting to display column number.
protected $_column = 0;
// Used for error reporting when unconsuming to calculate column number from
// last newline.
protected $newlines = [];
// Used for debugging to print out information as data is consumed.
public static $debug = false;
const ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
const DIGIT = '0123456789';
const HEX = '0123456789ABCDEFabcdef';
const WHITESPACE = "\t\n\x0c\x0d ";
public function __construct(string $data, string $filePath = 'STDIN', ParseError $errorHandler = null) {
$this->errorHandler = $errorHandler ?? new ParseError;
if ($filePath !== 'STDIN') {
$this->filePath = realpath($filePath);
$data = file_get_contents($this->filePath);
} else {
$this->filePath = $filePath;
}
// DEVIATION: The spec has steps for parsing and determining the character
// encoding. At this moment this implementation won't determine a character
// encoding and will just assume UTF-8.
# One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present
# in the input stream.
# Note: The handling of U+0000 NULL characters varies based on where the
# characters are found. In general, they are ignored except where doing so could
# plausibly introduce an attack vector. This handling is, by necessity, spread
# across both the tokenization stage and the tree construction stage.
// DEVIATION: Just going to remove NULL characters. There is no scripting involved
// in this implementation and therefore no attack vector possible due to it.
$data = preg_replace(['/^\xEF\xBB\xBF/','/\x00/'], '', $data);
// Won't provide line or column counts for this as it's done before that
// information is available. It will be rare that this is triggered.
$data = preg_replace_callback('/(?:[\x01-\x08\x0B\x0E-\x1F\x7F]|\xC2[\x80-\x9F]|\xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF])|\xEF\xB7[\x90-\xAF]|\xEF\xBF[\xBE\xBF]|[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF])/u', function($matches) {
$this->error(ParseError::INVALID_CONTROL_OR_NONCHARACTERS);
return '';
}, $data);
// Normalize line breaks. Convert CRLF and CR to LF.
// Break the string up into a traversable object.
$this->data = new \MensBeam\Intl\Encoding\UTF8(str_replace(["\r\n", "\r"], "\n", $data));
}
public function consume(int $length = 1): string {
assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length));
for ($i = 0, $string = ''; $i < $length; $i++) {
$char = $this->data->nextChar();
if ($char === "\n") {
$this->newlines[] = $this->data->posChar();
$this->_column = 1;
$this->_line++;
} else {
$this->_column++;
}
$string .= $char;
}
if (self::$debug) {
echo "\nConsume\n==========\n";
echo "Length: $length\n";
echo "Data: ";
var_export($string);
echo "\n";
echo "Pointer: {$this->data->posChar()}\n";
echo "==========\n\n";
}
return $string;
}
public function unconsume(int $length = 1) {
assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length));
$this->data->seek(0 - $length);
$string = $this->data->peekChar($length);
$numOfNewlines = substr_count($string, "\n");
if ($numOfNewlines > 0) {
$this->_line -= $numOfNewlines;
$count = $this->newlines;
$index = count($this->newlines) - ($numOfNewlines - 1);
$this->_column = 1 + (($count > 0 && isset($this->newlines[$index])) ? $this->data->posChar() - $this->newlines[$index] : $this->data->posChar());
} else {
$this->_column -= $length;
}
if (self::$debug) {
echo "\nUnconsume\n==========\n";
echo "Pointer: {$this->data->posChar()}\n";
echo "==========\n\n";
}
}
public function consumeWhile(string $match, int $limit = 0): string {
return $this->span($match, true, true, $limit);
}
public function consumeUntil(string $match, int $limit = 0): string {
return $this->span($match, false, true, $limit);
}
public function peek(int $length = 1): string {
assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length));
$string = $this->data->peekChar($length);
if (self::$debug) {
echo "\nPeek\n==========\n";
echo "Data: ";
var_export($string);
echo "\n";
echo "Pointer: {$this->data->posChar()}\n";
echo "==========\n\n";
}
return $string;
}
public function peekWhile(string $match, int $limit = 0): string {
return $this->span($match, true, false, $limit);
}
public function peekUntil(string $match, int $limit = 0): string {
return $this->span($match, false, false, $limit);
}
public function consumeCharacterReference(string $allowedCharacter = null, bool $inAttribute = false): string {
$char = $this->peek();
// OPTIMIZATION: When this spec states to return a character token of any kind this
// method will just return the character. The token will be emitted from
// Parser::parse() instead. Likewise, if the spec states to return nothing this
// method will instead return '&' because every single use of "tokenizing a
// character reference" in the spec this emits a '&' character token upon failure.
# The behavior depends on the identity of the next character (the one immediately
# after the U+0026 AMPERSAND character), as follows: U+0009 CHARACTER TABULATION
# (tab), U+000A LINE FEED (LF), U+000C FORM FEED (FF), U+0020 SPACE, U+003C
# LESS-THAN SIGN, U+0026 AMPERSAND, EOF, The additional allowed character, if
# there is one. Not a character reference. No characters are consumed, and nothing
# is returned. (This is not an error, either.)
if ($char === "\x09" || $char === "x0A" || $char === "\x0C" || $char === ' ' || $char === '<' || $char === '&' || $char === '' || (!is_null($allowedCharacter) && $char === $allowedCharacter)) {
return '&';
}
# U+0023 NUMBER SIGN (#)
if ($char === '#') {
# Consume the U+0023 NUMBER SIGN.
$this->consume();
$char = $this->peek();
# The behavior further depends on the character after the U+0023 NUMBER SIGN:
# U+0078 LATIN SMALL LETTER X, U+0058 LATIN CAPITAL LETTER X
if ($char === 'x' || $char === 'X') {
# Consume the X.
$this->consume();
# Consume as many characters as match the range of ASCII hex digits.
$number = $this->consumeWhile(self::HEX);
# If no characters match the range, then don't consume any characters (and
# unconsume the U+0023 NUMBER SIGN character and, if appropriate, the X
# character). This is a parse error; nothing is returned.
if (!$number) {
$this->error(ParseError::ENTITY_UNEXPECTED_CHARACTER, $this->peek(), 'hexadecimal digit');
$this->unconsume(2);
return '&';
}
} else {
# Consume as many characters as match the range of ASCII digits.
$number = $this->consumeWhile(self::DIGIT);
# If no characters match the range, then don't consume any characters (and
# unconsume the U+0023 NUMBER SIGN character and, if appropriate, the X
# character). This is a parse error; nothing is returned.
if (!$number) {
$peek = $this->peek();
if ($peek !== '') {
$this->error(ParseError::ENTITY_UNEXPECTED_CHARACTER, $this->peek(), 'decimal digit');
} else {
$this->error(ParseError::UNEXPECTED_EOF);
}
$this->unconsume();
return '&';
}
}
# Otherwise, if the next character is a U+003B SEMICOLON, consume that too. If it
# isn't, there is a parse error.
$char = $this->peek();
if ($char === ';') {
$this->consume();
} elseif ($char === '') {
$this->error(ParseError::UNEXPECTED_EOF);
} else {
$this->error(ParseError::ENTITY_UNEXPECTED_CHARACTER, $char, 'semicolon terminator');
}
# If one or more characters match the range, then take them all and interpret the
# string of characters as a number (either hexadecimal or decimal as appropriate).
# If that number is one of the numbers in the first column of the following table,
# then this is a parse error. Find the row with that number in the first column,
# and return a character token for the Unicode character given in the second
# column of that row.
// DEVIATION: Because NULL characters are stripped from the document there's no
// sense in checking for them here.
switch ($number) {
# 0x80 U+20AC EURO SIGN (€)
case 0x80: $returnValue = '€';
break;
# 0x80 U+20AC EURO SIGN (€)
case 0x82: $returnValue = '';
break;
# 0x83 U+0192 LATIN SMALL LETTER F WITH HOOK (ƒ)
case 0x83: $returnValue = 'ƒ';
break;
# 0x84 U+201E DOUBLE LOW-9 QUOTATION MARK (&ldquor;)
case 0x84: $returnValue = '„';
break;
# 0x85 U+2026 HORIZONTAL ELLIPSIS (&mldr;)
case 0x85: $returnValue = '…';
break;
# 0x86 U+2020 DAGGER (†)
case 0x86: $returnValue = '†';
break;
# 0x87 U+2021 DOUBLE DAGGER (‡)
case 0x87: $returnValue = '‡';
break;
# 0x88 U+02C6 MODIFIER LETTER CIRCUMFLEX ACCENT (ˆ)
case 0x88: $returnValue = 'ˆ';
break;
# 0x89 U+2030 PER MILLE SIGN (‰)
case 0x89: $returnValue = '‰';
break;
# 0x8A U+0160 LATIN CAPITAL LETTER S WITH CARON (Š)
case 0x8A: $returnValue = 'Š';
break;
# 0x8B U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK ()
case 0x8B: $returnValue = '';
break;
# 0x8C U+0152 LATIN CAPITAL LIGATURE OE (Œ)
case 0x8C: $returnValue = 'Œ';
break;
# 0x8E U+017D LATIN CAPITAL LETTER Z WITH CARON (&Zcaron;)
case 0x8E: $returnValue = 'Ž';
break;
# 0x91 U+2018 LEFT SINGLE QUOTATION MARK ()
case 0x91: $returnValue = '';
break;
# 0x92 U+2019 RIGHT SINGLE QUOTATION MARK (&rsquor;)
case 0x92: $returnValue = '';
break;
# 0x93 U+201C LEFT DOUBLE QUOTATION MARK (“)
case 0x93: $returnValue = '“';
break;
# 0x94 U+201D RIGHT DOUBLE QUOTATION MARK (”)
case 0x94: $returnValue = '”';
break;
# 0x95 U+2022 BULLET (&bullet;)
case 0x95: $returnValue = '•';
break;
# 0x96 U+2013 EN DASH ()
case 0x96: $returnValue = '';
break;
# 0x97 U+2014 EM DASH (—)
case 0x97: $returnValue = '—';
break;
# 0x98 U+02DC SMALL TILDE (˜)
case 0x98: $returnValue = '˜';
break;
# 0x99 U+2122 TRADE MARK SIGN (™)
case 0x99: $returnValue = '™';
break;
# 0x9A U+0161 LATIN SMALL LETTER S WITH CARON (š)
case 0x9A: $returnValue = 'š';
break;
# 0x9B U+203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ()
case 0x9B: $returnValue = '';
break;
# 0x9C U+0153 LATIN SMALL LIGATURE OE (œ)
case 0x9C: $returnValue = 'œ';
break;
# 0x9E U+017E LATIN SMALL LETTER Z WITH CARON (&zcaron;)
case 0x9E: $returnValue = 'ž';
break;
# 0x9F U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS (Ÿ)
case 0x9F: $returnValue = 'Ÿ';
break;
default : $returnValue = null;
}
if ($returnValue) {
$this->error(ParseError::INVALID_NUMERIC_ENTITY, $number);
// Consume the ampersand but return the value instead.
$this->consume();
return $returnValue;
}
# Otherwise, if the number is in the range 0xD800 to 0xDFFF or is greater than
# 0x10FFFF, then this is a parse error. Return a U+FFFD REPLACEMENT CHARACTER
# character token.
if (($number >= 0xD800 && $number <= 0xDFFF) || $number > 0x10FFFF) {
$this->error(ParseError::INVALID_CODEPOINT, $number);
return '<27>';
}
# Additionally, if the number is in the range 0x0001 to 0x0008, 0x000D to 0x001F,
# 0x007F to 0x009F, 0xFDD0 to 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF,
# 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
# 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF,
# 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
# 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, or 0x10FFFF, then this is a parse error.
if (($number >= 0x0001 && $number <= 0x0008) || ($number >= 0x000D && $number <= 0x001F) ||
($number >= 0x007F && $number <= 0x009F) || ($number >= 0xFDD0 && $number <= 0xFDEF) ||
$number === 0x000B || $number === 0xFFFE || $number === 0xFFFF || $number === 0x1FFFE ||
$number === 0x1FFFF || $number === 0x2FFFE || $number === 0x2FFFF || $number === 0x3FFFE ||
$number === 0x3FFFF || $number === 0x4FFFE || $number === 0x4FFFF || $number === 0x5FFFE ||
$number === 0x5FFFF || $number === 0x6FFFE || $number === 0x6FFFF || $number === 0x7FFFE ||
$number === 0x7FFFF || $number === 0x8FFFE || $number === 0x8FFFF || $number === 0x9FFFE ||
$number === 0x9FFFF || $number === 0xAFFFE || $number === 0xAFFFF || $number === 0xBFFFE ||
$number === 0xBFFFF || $number === 0xCFFFE || $number === 0xCFFFF || $number === 0xDFFFE ||
$number === 0xDFFFF || $number === 0xEFFFE || $number === 0xEFFFF || $number === 0xFFFFE ||
$number === 0xFFFFF || $number === 0x10FFFE || $number === 0x10FFFF) {
$this->error(ParseError::INVALID_CODEPOINT, $number);
// Consume the ampersand.
$this->consume();
return '&';
}
# Otherwise, return a character token for the Unicode character whose code point
# is that number.
return \MensBeam\Intl\Encoding\UTF8::encode((int) $number);
}
# Consume the maximum number of characters possible, with the consumed characters
# matching one of the identifiers in the first column of the named character
# references table (in a case-sensitive manner).
// Implementing this by peeking ahead 33 characters that match 0-9, A-Z, a-z, and
// ';'. 33 is the string length of the longest named character reference
// (calculated using `max(array_map('mb_strlen', array_keys($referenceTable)));`).
// It then checks the sequence of characters by checking them against a regular
// expression which is generated by a script that grabs the JSON of the character
// reference table from the spec and creates a somewhat optimized regular
// expression.
$sequence = static::peekWhile(self::DIGIT.self::ALPHA.';', 33);
if ($sequence !== '' && preg_match('/^(?:[Aa]acute(?:;)?|[Aa]breve;|acd;|acE;|[Aa]circ(?:;)?|acute(?:;)?|[Aa]cy;|[Aa][Ee]lig(?:;)?|[Aa]fr;|af;|[Aa]grave(?:;)?|alefsym;|aleph;|[Aa]lpha;|[Aa]macr;|amalg;|[aA][mM][pP](?:;)?|andand;|andd;|andslope;|andv;|[Aa]nd;|ange;|angmsdaa;|angmsdab;|angmsdac;|angmsdad;|angmsdae;|angmsdaf;|angmsdag;|angmsdah;|angmsd;|angrtvbd;|angrtvb;|angrt;|angsph;|angst;|angzarr;|[Aa]ogon;|[Aa]opf;|apacir;|ap[Ee];|apid;|apos;|ApplyFunction;|approxeq;|[Aa]ring(?:;)?|[Aa]scr;|Assign;|ast;|asympeq;|asymp;|[Aa]tilde(?:;)?|[Aa]uml(?:;)?|awconint;|awint;|backcong;|backepsilon;|backprime;|backsimeq;|backsim;|Backslash;|barvee;|Barv;|barwedge;|[bB]arwed;|bbrktbrk;|bbrk;|bcong;|[Bb]cy;|bdquo;|[bB]ecause;|becaus;|bemptyv;|bepsi;|Bernoullis;|bernou;|[Bb]eta;|beth;|between;|[Bb]fr;|bigcap;|bigcirc;|bigcup;|bigodot;|bigoplus;|bigotimes;|bigsqcup;|bigstar;|bigtriangledown;|bigtriangleup;|biguplus;|bigvee;|bigwedge;|bkarow;|blacklozenge;|blacksquare;|blacktriangledown;|blacktriangleleft;|blacktriangleright;|blacktriangle;|ac;|angle;|ang;|blank;|blk12;|blk14;|blk34;|block;|bnequiv;|bne;|b[Nn]ot;|[Bb]opf;|bottom;|bot;|bowtie;|boxbox;|box[dD][lL];|box[dD][rR];|box[hH][dD];|box[hH][uU];|box[hH];|boxminus;|boxplus;|boxtimes;|box[uU][lL];|box[uU][rR];|box[vV][hH];|box[vV][lL];|box[vV][rR];|box[vV];|bprime;|[bB]reve;|brvbar(?:;)?|[bB]scr;|bsemi;|bsime;|bsim;|bsolb;|bsolhsub;|bsol;|bullet;|bull;|[Bb]umpeq;|bump[Ee];|bump;|[Cc]acute;|capand;|capbrcup;|capcap;|capcup;|capdot;|CapitalDifferentialD;|caps;|[cC]ap;|caret;|[Cc]caron;|caron;|Cayleys;|ccaps;|[Cc]cedil(?:;)?|[Cc]circ;|Cconint;|ccupssm;|ccups;|[Cc]dot;|Cedilla;|cedil(?:;)?|cemptyv;|[cC]enter[dD]ot;|cent(?:;)?|[cC]fr;|[Cc][Hh]cy;|checkmark;|check;|[Cc]hi;|circeq;|circlearrowleft;|circlearrowright;|circledast;|circledcirc;|circleddash;|CircleDot;|circledR;|circledS;|CircleMinus;|CirclePlus;|CircleTimes;|circ;|cir[Ee];|cirfnint;|cirmid;|cirscir;|cir;|ClockwiseContourIntegral;|CloseCurlyDoubleQuote;|CloseCurlyQuote;|clubsuit;|clubs;|coloneq;|[Cc]olone;|[cC]olon;|commat;|comma;|compfn;|complement;|complexes;|comp;|congdot;|Congruent;|cong;|cwconint;|[cC]onint;|ContourIntegral;|[cC]opf;|Coproduct;|coprod;|copysr;|[cC][oO][pP][yY](?:;)?|CounterClockwiseContourIntegral;|crarr;|[cC]ross;|[Cc]scr;|csube;|csub;|csupe;|csup;|ctdot;|cudarrl;|cudarrr;|cuepr;|cuesc;|cularrp;|cularr;|cupbrcap;|[cC]up[cC]ap;|cupcup;|cupdot;|cupor;|cups;|[cC]up;|curarrm;|curarr;|curlyeqprec;|curlyeqsucc;|curlyvee;|curlywedge;|curren(?:;)?|curvearrowleft;|curvearrowright;|cuvee;|cuwed;|cwint;|cylcty;|[dD]agger;|daleth;|[dD][aA]rr;|[Dd]ashv;|dash;|dbkarow;|dblac;|[Dd]caron;|[Dd]cy;|ddagger;|ddarr;|DDotrahd;|ddotseq;|[Dd][Dd];|deg(?:;)?|[Dd]elta;|Del;|demptyv;|dfisht;|[Dd]fr;|dharl;|dharr;|dHar;|DiacriticalAcute;|DiacriticalDot;|DiacriticalDoubleAcute;|DiacriticalGrave;|DiacriticalTilde;|diamondsuit;|[dD]iamond;|diams;|diam;|die;|DifferentialD;|digamma;|disin;|divideontimes;|divide(?:;)?|divonx;|div;|[Dd][Jj]cy;|dlcorn;|dlcrop;|dollar;|[Dd]opf;|DotDot;|doteqdot;|DotEqual;|doteq;|dotminus;|dotplus;|dotsquare;|doublebarwedge;|DoubleContourIntegral;|DoubleDot;|[Dd]ot;|DoubleDownArrow;|DoubleLeftArrow;|DoubleLeftRightArrow;|DoubleLeftTee;|DoubleLongLeftArrow;|DoubleLongLeftRightArrow;|DoubleLongRightArrow;|DoubleRightArrow;|DoubleRightTee;|DoubleUpArrow;|DoubleUpDownArrow;|DoubleVerticalBar;|DownArrowBar;|DownArrowUpArrow;|downdownarrows;|[dD]own[aA]rrow;|DownBreve;|downharpoonleft;|downharpoonright;|DownLeftRightVector;|DownLeftTeeVector;|DownLeftVectorBar;|DownLeftVector;|DownRightTeeVector;|DownRightVectorBar;|DownRightVector;|DownTeeArrow;|DownTee;|drbkarow;|drcorn;|drcrop;|[Dd]scr;|[Dd][Ss]cy;|dsol;|[Dd]strok;|dtdot;|dtrif;|dtri;|duarr;|duhar;|dwangle;|[Dd][Zz]cy;|dzigrarr;|[Ee]acute(?:;)?|easter;|[Ee]caron;|[Ee]circ(?:;)?|ecir;|ecolon;|[Ee]cy;|eDDot;|[Ee][dD]ot;|ee;|efDot;|[Ee]fr;|[Ee]grave(?:;)?|egsdot;|egs;|eg;|Element;|elinters;|ell;|elsdot;|els;|el;|[Ee]macr;|emptyset;|EmptySmallSquare;|EmptyVerySmallSquare;|emptyv;|empty;|emsp13;|emsp14;|emsp;|[Ee][Nn][Gg];|ensp;|[Ee]ogon;|[Ee]opf;|eparsl;|epar;|eplus;|[Ee]psilon;|epsiv;|epsi;|eqcirc;|eqcolon;|eqsim;|eqslantgtr;|eqslantless;|equals;|EqualTilde;|Equal;|equest;|Equilibrium;|equivDD;|equiv;|eqvparsl;|erarr;|erDot;|[eE]scr;|esdot;|[Ee]sim;|[Ee]ta;|[Ee][Tt][Hh](?:;)?|[Ee]uml(?:;)?|euro;|excl;|Exists;|exist;|expectation;|[eE]xponential[eE];|fallingdotseq;|[Ff]cy;|female;|ffilig;|fflig;|ffllig;|[Ff]fr;|filig;|FilledSmallSquare;|FilledVerySmallSquare;|fjlig;|flat;|fllig;|fltns;|fnof;|[Ff]opf;|[fF]or[aA]ll;|forkv;|fork;|Fouriertrf;|fpartint;|frac12(?:;)?|frac13;|frac14(?:;)?|frac15;|frac16;|frac18;|frac23;|frac25;|frac34(?:;)?|frac35;|frac38;|frac45;|frac56;|frac58;|frac78;|frasl;|frown;|[fF]scr;|gacute;|[Gg]ammad;|[Gg]amma;|gap;|[Gg]breve;|Gcedil;|[Gg]circ;|[Gg]cy;|[Gg]dot;|g[Ee]l;|geqq;|geqslant;|geq;|gescc;|gesdotol;|gesdoto;|gesdot;|gesles;|gesl;|ges;|g[eE];|[Gg]fr;|ggg;|[gG]g;|gimel;|[Gg][Jj]cy;|gla;|glE;|glj;|gl;|gnapprox;|gnap;|gneqq;|gneq;|gn[eE];|gnsim;|[Gg]opf;|grave;|GreaterEqualLess;|GreaterEqual;|GreaterFullEqual;|GreaterGreater;|GreaterLess;|GreaterSlantEqual;|GreaterTilde;|[Gg]scr;|gsime;|gsiml;|gsim;|gtcc;|gtcir;|gtdot;|gtlPar;|gtquest;|gtrapprox;|gtrarr;|gtrdot;|gtreqless;|gtreqqless;|gtrless;|gtrsim;|[gG][tT](?:;)?|gvertneqq;|gvnE;|Hacek;|hairsp;|half;|hamilt;|[Hh][Aa][Rr][Dd]cy;|harrcir;|harrw;|h[aA]rr;|Hat;|hbar;|[Hh]circ;|heartsuit;|hearts;|hellip;|hercon;|[hH]fr;|HilbertSpace;|hksearow;|hkswarow;|hoarr;|homtht;|hookleftarrow;|hookrightarrow;|[hH]opf;|horbar;|HorizontalLine;|[hH]scr;|hslash;|[Hh]strok;|HumpDownHump;|HumpEqual;|hybull;|hyphen;|[Ii]acute(?:;)?|[Ii]circ(?:;)?|[Ii]cy;|ic;|Idot;|[Ii][Ee]cy;|iexcl(?:;)?|iff;|[iI]fr;|[Ii]grave(?:;)?|iiiint;|iiint;|iinfin;|iiota;|ii;|[Ii][Jj]lig;|[Ii]macr;|image;|ImaginaryI;|imagline;|imagpart;|imath;|imof;|imped;|Implies;|Im;|incare;|infintie;|infin;|inodot;|intcal;|integers;|Integral;|intercal;|Intersection;|intlarhk;|intprod;|[iI]nt;|InvisibleComma;|InvisibleTimes;|in;|[Ii][Oo]cy;|[Ii]ogon;|[Ii]opf;|[Ii]ota;|iprod;|iquest(?:;)?|[iI]scr;|isindot;|isinE;|isinsv;|isins;|isinv;|isin;|[Ii]tilde;|it;|[Ii]ukcy;|[Ii]uml(?:;)?|[Jj]circ;|[Jj]cy;|[Jj]fr;|jmath;|[Jj]opf;|[Jj]scr;|[Jj]sercy;|[Jj]ukcy;|kappav;|[Kk]appa;|[Kk]cedil;|[Kk]cy;|[Kk]fr;|kgreen;|[Kk][Hh]cy;|[Kk][Jj]cy;|[Kk]opf;|[Kk]scr;|lAarr;|[Ll]acute;|laemptyv;|lagran;|[Ll]ambda;|langd;|langle;|[lL]ang;|Laplacetrf;|lap;|laquo(?:;)?|larrbfs;|larrb;|larrfs;|larrhk;|larrlp;|larrpl;|larrsim;|larrtl;|[lL][aA]rr;|l[aA]tail;|lates;|late;|lat;|l[bB]arr;|lbbrk;|lbrace;|lbrack;|lbrke;|lbrksld;|lbrkslu;|[Ll]caron;|[Ll]cedil;|lceil;|lcub;|[Ll]cy;|ldca;|ldquor;|ldquo;|ldrdhar;|ldrushar;|ldsh;|LeftAngleBracket;|LeftArrowBar;|LeftArrowRightArrow;|leftarrowtail;|[lL]eft[aA]rrow;|LeftCeiling;|LeftDoubleBracket;|LeftDownTeeVector;|LeftDownVectorBar;|LeftDownVector;|LeftFloor;|leftharpoondown;|leftharpoonup;|leftleftarrows;|leftrightarrows;|[lL]eft[rR]ight[aA]rrow;|leftrightharpoons;|leftrightsquigarrow;|LeftRightVector;|LeftTeeArrow;|LeftTeeVector;|LeftTee;|leftthreetimes;|LeftTriangleBar;|LeftTriangleEqual;|LeftTriangle;|LeftUpDownVector;|LeftUpTeeVector;|LeftUpVectorBar;|LeftUpVector;|LeftVectorBar;|LeftVector;|l[Ee]g;|leqq;|leqslant;|leq;|lescc;|lesdotor;|lesdoto;|lesdot;|lesges;|lesg;|lessapprox;|lessdot;|lesseqgtr;|lesseqqgtr;|LessEqualGreater;|LessFullEqual;|LessGreater;|lessgtr;|LessLess;|lesssim;|LessSlantEqual;|LessTilde;|les;|l[eE];|lfisht;|lfloor;|[Ll]fr;|lgE;|lg;|lhard;|lharul;|lharu;|lHar;|lhblk;|[Ll][Jj]cy;|llarr;|llcorner;|Lleftarrow;|llhard;|lltri;|[lL]l;|[Ll]midot;|lmoustache;|lmoust;|lnapprox;|lnap;|lneqq;|lneq;|ln[eE];|lnsim;|loang;|loarr;|lobrk;|[lL]ong[lL]eft[aA]rrow;|[lL]ong[lL]eft[rR]ight[aA]rrow;|longmapsto;|[lL]ong[rR]ight[aA]rrow;|looparrowleft;|looparrowright;|lopar;|[Ll]opf;|loplus;|lotimes;|lowast;|lowbar;|LowerLeftArrow;|LowerRightArrow;|lozenge;|lozf;|loz;|lparlt;|lpar;|lrarr;|lrcorner;|lrhard;|lrhar;|lrm;|lrtri;|lsaquo;|[lL]scr;|[lL]sh;|lsime;|lsimg;|lsim;|lsqb;|lsquor;|lsquo;|[Ll]strok;|ltcc;|ltcir;|ltdot;|lthree;|ltimes;|ltlarr;|ltquest;|ltrie;|ltrif;|ltri;|ltrPar;|[lL][tT](?:;)?|lurdshar;|luruhar;|lvertneqq;|lvnE;|macr(?:;)?|male;|maltese;|malt;|mapstodown;|mapstoleft;|mapstoup;|mapsto;|[Mm]ap;|marker;|mcomma;|[Mm]cy;|mdash;|mDDot;|measuredangle;|MediumSpace;|Mellintrf;|[Mm]fr;|mho;|micro(?:;)?|midast;|midcir;|middot(?:;)?|mid;|minusb;|minusdu;|minusd;|MinusPlus;|minus;|mlcp;|mldr;|mnplus;|models;|[Mm]opf;|mp;|[Mm]scr;|mstpos;|multimap;|mumap;|[mM]u;|nabla;|[nN]acute;|nang;|napE;|napid;|napos;|precnapprox;|succnapprox;|napprox;|approx;|nap;|naturals;|natural;|natur;|nbsp(?:;)?|nbumpe;|nbump;|ncap;|ap;|[nN]caron;|[nN]cedil;|ncongdot;|ncong;|ncup;|[nN]cy;|ndash;|nearhk;|nearrow;|ne[Aa]rr;|nedot;|NegativeMediumSpace;|NegativeThickSpace;|NegativeThinSpace;|NegativeVeryThinSpace;|nequiv;|nesear;|nesim;|NestedGreaterGreater;|NestedLessLess;|NewLine;|nexists;|nexist;|ne;|[nN]fr;|ngeqq;|ngeqslant;|ngeq;|nges;|ng[eE];|nGg;|ngsim;|ngtr;|nGtv;|n[gG]t;|nh[Aa]rr;|nhpar;|nisd;|nis;|[nN][jJ]cy;|nl[Aa]rr;|nldr;|n[Ll]eftarrow;|n[Ll]eftrightarrow;|nleqq;|nleqslant;|nleq;|nless;|nles;|nl[eE];|nLl;|nlsim;|nltrie;|nltri;|nLtv;|n[lL]t;|nmid;|NoBreak;|NonBreakingSpace;|[Nn]opf;|NotCongruent;|NotCupCap;|NotDoubleVerticalBar;|NotElement;|NotEqualTilde;|NotEqual;|NotExists;|NotGreaterEqual;|NotGreaterFullEqual;|NotGreaterGreater;|NotGreaterLess;|NotGreaterSlantEqual;|NotGreaterTilde;|NotGreater;|NotHumpDownHump;|NotHumpEqual;|notindot;|notinE;|notinva;|notinvb;|notinvc;|notin;|NotLeftTriangleBar;|NotLeftTriangleEqual;|NotLeftTriangle;|NotLessEqual;|NotLessGreater;|NotLessLess;|NotLessSlantEqual;|NotLessTilde;|NotLess;|NotNestedGreaterGreater;|NotNestedLessLess;|notniva;|notnivb;|notnivc;|niv;|notni;|ni;|NotPrecedesEqual;|NotPrecedesSlantEqual;|NotPrecedes;|NotReverseElement;|NotRightTriangleBar;|NotRightTriangleEqual;|NotRightTriangle;|NotSquareSubsetEqual;|NotSquareSubset;|NotSquareSupersetEqual;|NotSquareSuperset;|NotSubsetEqual;|NotSubset;|NotSucceedsEqual;|NotSucceedsSlantEqual;|NotSucceedsTilde;|NotSucceeds;|NotSupersetEqual;|NotSuperset;|NotTildeEqual;|NotTildeFullEqual;|NotTildeTilde;|NotTilde;|NotVerticalBar;|[nN]ot(?:;)?|nparallel;|nparsl;|npart;|npar;|npolint;|nprcue;|npreceq;|nprec;|npre;|npr;|nrarrc;|nrarrw;|nr[Aa]rr;|n[Rr]ightarrow;|nrtrie;|nrtri;|nsccue;|nsce;|[nN]scr;|nsc;|nshortmid;|nshortparallel;|nsimeq;|nsime;|nsim;|nsmid;|nspar;|nsqsube;|nsqsupe;|nsub[eE];|nsubseteqq;|nsubseteq;|nsubset;|nsub;|nsucceq;|nsucc;|nsup[eE];|nsupseteqq;|nsupseteq;|nsupset;|nsup;|ntgl;|[nN]tilde(?:;)?|ntlg;|ntrianglelefteq;|ntriangleleft;|ntrianglerighteq;|ntriangleright;|numero;|numsp;|num;|[nN]u;|nvap;|n[Vv][Dd]ash;|nvge;|nvgt;|nvHarr;|nvinfin;|nvlArr;|nvle;|nvltrie;|nvlt;|nvrArr;|nvrtrie;|nvsim;|nwarhk;|nwarrow;|nw[Aa]rr;|nwnear;|[oO]acute(?:;)?|oast;|[oO]circ(?:;)?|ocir;|[oO]cy;|odash;|[oO]dblac;|odiv;|odot;|odsold;|[oO][eE]lig;|ofcir;|[oO]fr;|ogon;|[oO]grave(?:;)?|ogt;|ohbar;|ohm;|oint;|olarr;|olcir;|olcross;|oline;|olt;|[oO]macr;|[oO]mega;|[oO]micron;|omid;|ominus;|[oO]opf;|opar;|OpenCurlyDoubleQuote;|OpenCurlyQuote;|operp;|oplus;|orarr;|orderof;|order;|ordf(?:;)?|ordm(?:;)?|ord;|origof;|oror;|orslope;|orv;|[oO]scr;|[oO]slash(?:;)?|osol;|oS;|[oO]tilde(?:;)?|otimesas;|[oO]times;|[oO]uml(?:;)?|ovbar;|OverBar;|OverBrace;|OverBracket;|OverParenthesis;|parallel;|para(?:;)?|parsim;|parsl;|PartialD;|part;|par;|[pP]cy;|percnt;|period;|permil;|perp;|pertenk;|[pP]fr;|phiv;|[pP]hi;|phmmat;|phone;|pitchfork;|piv;|[pP]i;|planckh;|planck;|plankv;|plusacir;|plusb;|pluscir;|plusdo;|plusdu;|pluse;|PlusMinus;|plusmn(?:;)?|plussim;|plustwo;|plus;|pm;|Poincareplane;|pointint;|[Pp]opf;|pound(?:;)?|prap;|prcue;|precapprox;|preccurlyeq;|PrecedesEqual;|PrecedesSlantEqual;|PrecedesTilde;|Precedes;|preceq;|precneqq;|precnsim;|precsim;|prec;|pr[Ee];|primes;|[Pp]rime;|prnap;|prnE;|prnsim;|Product;|prod;|profalar;|profline;|profsurf;|Proportional;|Proportion;|propto;|prop;|prsim;|prurel;|[pP]r;|[pP]scr;|[pP]si;|puncsp;|[qQ]fr;|qint;|[Qq]opf;|qprime;|[qQ]scr;|quaternions;|quatint;|questeq;|quest;|[Qq][Uu][Oo][Tt](?:;)?|rAarr;|race;|[rR]acute;|radic;|raemptyv;|rangd;|range;|rangle;|[Rr]ang;|raquo(?:;)?|rarrap;|rarrbfs;|rarrb;|rarrc;|rarrfs;|rarrhk;|rarrlp;|rarrpl;|rarrsim;|[rR]arrtl;|rarrw;|[rR][Aa]rr;|r[Aa]tail;|rationals;|ratio;|[Rr][Bb]arr;|rbbrk;|rbrace;|rbrack;|rbrke;|rbrksld;|rbrkslu;|[rR]caron;|[rR]cedil;|rceil;|rcub;|[rR]cy;|rdca;|rdldhar;|rdquor;|rdquo;|rdsh;|realine;|realpart;|reals;|real;|rect;|[Rr][Ee][Gg](?:;)?|ReverseElement;|ReverseEquilibrium;|ReverseUpEquilibrium;|Re;|rfisht;|rfloor;|[Rr]fr;|rhard;|rharul;|rharu;|rHar;|rhov;|[rR]ho;|RightAngleBracket;|RightArrowBar;|RightArrowLeftArrow;|rightarrowtail;|[Rr]ight[aA]rrow;|RightCeiling;|RightDoubleBracket;|RightDownTeeVector;|RightDownVectorBar;|RightDownVector;|RightFloor;|rightharpoondown;|rightharpoonup;|rightleftarrows;|rightleftharpoons;|rightrightarrows;|rightsquigarrow;|RightTeeArrow;|RightTeeVector;|RightTee;|rightthreetimes;|RightTriangleBar;|RightTriangleEqual;|RightTriangle;|RightUpDownVector;|RightUpTeeVector;|RightUpVectorBar;|RightUpVector;|RightVectorBar;|RightVector;|[oO]r;|ring;|risingdotseq;|rlarr;|rlhar;|rlm;|rmoustache;|rmoust;|rnmid;|roang;|roarr;|robrk;|ropar;|[Rr]opf;|roplus;|rotimes;|RoundImplies;|rpargt;|rpar;|rppolint;|rrarr;|Rrightarrow;|rsaquo;|[Rr]scr;|[Rr]sh;|rsqb;|rsquor;|rsquo;|rthree;|rtimes;|rtrie;|rtrif;|rtriltri;|rtri;|RuleDelayed;|ruluhar;|rx;|[sS]acute;|sbquo;|scap;|[sS]caron;|sccue;|[sS]cedil;|sc[Ee];|[sS]circ;|scnap;|scnE;|scnsim;|scpolint;|scsim;|[sS]cy;|[sS]c;|sdotb;|sdote;|sdot;|searhk;|searrow;|se[Aa]rr;|sect(?:;)?|semi;|seswar;|setminus;|setmn;|sext;|sfrown;|[sS]fr;|sharp;|[sS][hH][cC][hH]cy;|[sS][hH]cy;|ShortDownArrow;|ShortLeftArrow;|shortmid;|shortparallel;|ShortRightArrow;|ShortUpArrow;|shy(?:;)?|sigmaf;|sigmav;|[sS]igma;|simdot;|simeq;|sime;|simgE;|simg;|simlE;|siml;|simne;|simplus;|simrarr;|sim;|slarr;|SmallCircle;|smallsetminus;|smashp;|smeparsl;|smid;|smile;|smtes;|smte;|smt;|[sS][oO][fF][tT]cy;|solbar;|solb;|sol;|[sS]opf;|spadesuit;|spades;|spar;|sqcaps;|sqcap;|sqcups;|sqcup;|Sqrt;|sqsube;|sqsubseteq;|sqsubset;|sqsub;|sqsupe;|sqsupseteq;|sqsupset;|sqsup;|SquareIntersection;|SquareSubsetEqual;|SquareSubset;|SquareSupersetEqual;|SquareSuperset;|SquareUnion;|[Ss]quare;|squarf;|squf;|squ;|srarr;|[sS]scr;|ssetmn;|ssmile;|sstarf;|starf;|[sS]tar;|straightepsilon;|straightphi;|strns;|subdot;|subedot;|sub[eE];|submult;|subn[eE];|subplus;|subrarr;|subseteqq;|SubsetEqual;|subseteq;|subsetneqq;|subsetneq;|[Ss]ubset;|subsim;|subsub;|subsup;|[Ss]ub;|succapprox;|succcurlyeq;|SucceedsEqual;|SucceedsSlantEqual;|SucceedsTilde;|Succeeds;|succeq;|succneqq;|succnsim;|succsim;|succ;|SuchThat;|[Ss]um;|sung;|sup1(?:;)?|sup2(?:;)?|sup3(?:;)?|supdot;|supdsub;|supedot;|SupersetEqual;|Superset;|sup[eE];|suphsol;|suphsub;|suplarr;|supmult;|supn[eE];|supplus;|supseteqq;|supseteq;|supsetneqq;|supsetneq;|[Ss]upset;|supsim;|supsub;|supsup;|[Ss]up;|swarhk;|swarrow;|sw[Aa]rr;|swnwar;|szlig(?:;)?|Tab;|target;|[tT]au;|tbrk;|[tT]caron;|[tT]cedil;|[tT]cy;|tdot;|telrec;|[tT]fr;|there4;|[Tt]herefore;|thetasym;|thetav;|[tT]heta;|thickapprox;|thicksim;|ThickSpace;|ThinSpace;|thinsp;|thkap;|thksim;|[tT][hH][oO][rR][nN](?:;)?|TildeEqual;|TildeFullEqual;|TildeTilde;|timesbar;|timesb;|timesd;|times(?:;)?|tint;|toea;|topbot;|topcir;|topfork;|[tT]opf;|top;|tosa;|tprime;|[Tt][Rr][Aa][Dd][Ee];|triangledown;|trianglelefteq;|triangleleft;|triangleq;|trianglerighteq;|triangleright;|triangle;|tridot;|trie;|triminus;|TripleDot;|triplus;|trisb;|tritime;|trpezium;|[tT]scr;|[tT][sS]cy;|[tT][sS][hH]cy;|[tT]strok;|twixt;|twoheadleftarrow;|twoheadrightarrow;|[uU]acute(?:;)?|Uarrocir;|[uU][Aa]rr;|[uU]brcy;|[uU]breve;|[uU]circ(?:;)?|[uU]cy;|udarr;|[uU]dblac;|udhar;|ufisht;|[uU]fr;|[uU]grave(?:;)?|uharl;|uharr;|uHar;|uhblk;|ulcorner;|ulcorn;|ulcrop;|ultri;|[uU]macr;|uml(?:;)?|UnderBar;|UnderBrace;|UnderBracket;|UnderParenthesis;|UnionPlus;|Union;|[uU]ogon;|[uU]opf;|UpArrowBar;|UpArrowDownArrow;|[Uu]p[aA]rrow;|[Uu]p[dD]own[aA]rrow;|UpEquilibrium;|upharpoonleft;|upharpoonright;|uplus;|UpperLeftArrow;|UpperRightArrow;|upsih;|[uU]psilon;|[Uu]psi;|UpTeeArrow;|UpTee;|upuparrows;|urcorner;|urcorn;|urcrop;|[uU]ring;|urtri;|[uU]scr;|utdot;|[uU]tilde;|[Tt]ilde;|utrif;|utri;|uuarr;|[uU]uml(?:;)?|uwangle;|vangrt;|varepsilon;|varkappa;|varnothing;|varphi;|varpi;|varpropto;|varrho;|v[Aa]rr;|varsigma;|varsubsetneqq;|varsubsetneq;|varsupsetneqq;|varsupsetneq;|vartheta;|vartriangleleft;|vartriangleright;|vBarv;|[Vv][bB]ar;|[vV]cy;|Vdashl;|[Vv][Dd]ash;|veebar;|veeeq;|[Vv]ee;|vellip;|[Vv]erbar;|VerticalBar;|VerticalLine;|VerticalSeparator;|VerticalTilde;|[Vv]ert;|VeryThinSpace;|[vV]fr;|vltri;|vnsub;|vnsup;|[vV]opf;|vprop;|vrtri;|[vV]scr;|vsubn[eE];|vsupn[eE];|Vvdash;|vzigzag;|[wW]circ;|wedbar;|wedgeq;|xwedge;|[Ww]edge;|weierp;|[wW]fr;|[wW]opf;|wp;|wreath;|wr;|[wW]scr;|xcap;|xcirc;|xcup;|xdtri;|[xX]fr;|xh[Aa]rr;|[xX]i;|xl[Aa]rr;|xmap;|xnis;|xodot;|[xX]opf;|xoplus;|xotime;|xr[Aa]rr;|[xX]scr;|xsqcup;|xuplus;|xutri;|xvee;|[yY]acute(?:;)?|[yY][aA]cy;|[yY]circ;|[yY]cy;|yen(?:;)?|[yY]fr;|[yY][iI]cy;|[yY]opf;|[yY]scr;|[yY][uU]cy;|[Yy]uml(?:;)?|[zZ]acute;|[zZ]caron;|[zZ]cy;|[zZ]dot;|zeetrf;|ZeroWidthSpace;|[zZ]eta;|[Zz]fr;|[zZ][hH]cy;|zigrarr;|[Zz]opf;|[zZ]scr;|zwj;|zwnj;)/', $sequence, $matches)) {
$sequence = $matches[0];
$lastChar = substr($sequence, -1);
# If the character reference is being consumed as part of an attribute, and the
# last character matched is not a U+003B SEMICOLON character (;), and the next
# character is either a U+003D EQUALS SIGN character (=) or an alphanumeric ASCII
# character, then, for historical reasons, all the characters that were matched
# after the U+0026 AMPERSAND character (&) must be unconsumed, and nothing is
# returned. However, if this next character is in fact a U+003D EQUALS SIGN
# character (=), then this is a parse error, because some legacy user agents will
# misinterpret the markup in those cases.
// OPTIMIZATION: Not consuming here until this stuff is checked because there's no
// sense in consuming characters and then turning right back around and unconsuming
// them. Will consume after this step instead.
$next = $this->peek();
if ($inAttribute && $lastChar !== ';' && ($next === '=' || ctype_alnum($next))) {
if ($next === '=') {
$this->error(ParseError::ENTITY_UNEXPECTED_CHARACTER, $next, 'semicolon terminator');
}
// Consume the ampersand.
$this->consume();
return '&';
}
// Add 1 to the string length because the & isn't included in the matched
// sequence.
$this->consume(strlen($sequence) + 1);
if ($lastChar !== ';') {
// Used for PHP's entity decoder. Described below.
$sequence.=';';
$this->error(ParseError::ENTITY_UNEXPECTED_CHARACTER, $lastChar, 'semicolon terminator');
}
# Return one or two character tokens for the character(s) corresponding to the
# character reference name (as given by the second column of the named character
# references table).
// DEVIATION: Since the regular expression above checks the validity of the
// regular expression there isn't a need for the table. Can use PHP's built in
// decoder at least until there's entities in the table that aren't in the spec's.
return html_entity_decode('&'.$sequence, ENT_HTML5);
}
# If no match can be made, then no characters are consumed, and nothing is
# returned. In this case, if the characters after the U+0026 AMPERSAND character
# (&) consist of a sequence of one or more alphanumeric ASCII characters followed
# by a U+003B SEMICOLON character (;), then this is a parse error.
if (preg_match('/^[A-Za-z0-9]+;/', $char)) {
$this->error(ParseError::INVALID_NAMED_ENTITY, $char);
}
// Consume the ampersand.
$this->consume();
return '&';
}
protected function span(string $match, bool $while = true, bool $advancePointer = true, int $limit = 0): string {
// Break the matching characters into an array of characters. Unicode friendly.
$match = preg_split('/(?<!^)(?!$)/Su', $match);
$count = 0;
$string = '';
while (true) {
$char = $this->data->nextChar();
if ($char === '') {
break;
}
$inArray = in_array($char, $match);
// strspn
if ($while && !$inArray) {
break;
}
// strcspn
elseif (!$while && $inArray) {
break;
}
if ($advancePointer) {
if ($char === "\n") {
$this->newlines[] = $this->data->posChar();
$this->_column = 1;
$this->_line++;
} else {
$this->_column++;
}
}
$string .= $char;
$count++;
if ($count === $limit) {
break;
}
}
// If the end is reached the pointer isn't moved when the last character
// is checked, so it only needs to be moved backwards if not wanting the
// pointer to move.
if ($char === '') {
if (!$advancePointer) {
$this->data->seek(0 - $count - 1);
}
} else {
$this->data->seek(($advancePointer) ? -1 : 0 - $count - 2);
}
if (self::$debug) {
echo ($advancePointer) ? "\nconsume" : "\npeek";
echo ($while) ? 'While' : 'Until';
echo "\n==========\nPattern: ";
var_export(str_replace(["\t", "\n", "\x0c", "\x0d"], ['\t', '\n', '\x0c', '\x0d'], implode('', $match)));
echo "\nData: ";
var_export($string);
echo "\nPointer: {$this->data->posChar()}\n==========\n\n";
}
return $string;
}
public function __get($property) {
switch ($property) {
case 'column': return $this->_column;
break;
case 'line': return $this->_line;
break;
case 'pointer': return $this->data->posChar();
break;
default: return null;
}
}
}