Fix character reference parsing

This commit is contained in:
Dustin Wilson 2018-08-31 13:25:05 -05:00
parent 33363ab2d3
commit 66ec4dab27
2 changed files with 6 additions and 5 deletions

View file

@ -358,7 +358,7 @@ class Data
# Otherwise, return a character token for the Unicode character whose code point
# is that number.
return \MensBeam\Intl\Encoding\UTF8::encode($number);
}
}
# Consume the maximum number of characters possible, with the consumed characters
# matching one of the identifiers in the first column of the named character
@ -399,7 +399,9 @@ class Data
return '&';
}
$this->consume(strlen($sequence));
// Add 1 to the string length because the & isn't included in the matched
// sequence.
$this->consume(strlen($sequence) + 1);
if ($lastChar !== ';') {
// Used for PHP's entity decoder. Described below.

View file

@ -39,12 +39,11 @@ class TreeBuilder {
// Used to store the template insertion modes
protected $templateInsertionModes;
// Used for debugging to print out information as the tree is built.
public static $debug = false;
// Instance used with the static token insertion methods.
protected static $instance;
// Used for debugging to print out information as the tree is built.
protected static $debug = false;
// Constants used for insertion modes
const INITIAL_MODE = 0;