Browse Source

Another infinite loop in Tokenizer caused by Data

split-manual
Dustin Wilson 5 years ago
parent
commit
a0c3883363
  1. 22
      lib/Data.php
  2. 7
      lib/ParseError.php
  3. 9
      lib/Tokenizer.php

22
lib/Data.php

@ -100,19 +100,21 @@ class Data {
public function unconsume(int $length = 1) {
assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length));
$this->data->seek(0 - $length);
if ($this->data->peekChar($length) !== '') {
$this->data->seek(0 - $length);
$string = $this->data->peekChar($length);
$numOfNewlines = substr_count($string, "\n");
$string = $this->data->peekChar($length);
$numOfNewlines = substr_count($string, "\n");
if ($numOfNewlines > 0) {
$this->_line -= $numOfNewlines;
if ($numOfNewlines > 0) {
$this->_line -= $numOfNewlines;
$count = $this->newlines;
$index = count($this->newlines) - ($numOfNewlines - 1);
$this->_column = 1 + (($count > 0 && isset($this->newlines[$index])) ? $this->data->posChar() - $this->newlines[$index] : $this->data->posChar());
} else {
$this->_column -= $length;
$count = $this->newlines;
$index = count($this->newlines) - ($numOfNewlines - 1);
$this->_column = 1 + (($count > 0 && isset($this->newlines[$index])) ? $this->data->posChar() - $this->newlines[$index] : $this->data->posChar());
} else {
$this->_column -= $length;
}
}
if (self::$debug) {

7
lib/ParseError.php

@ -58,8 +58,8 @@ class ParseError {
assert(count($arg) === $count, new Exception(Exception::INCORRECT_PARAMETERS_FOR_MESSAGE, $count));
if ($count > 0) {
// Convert newlines and tabs in the arguments to words to better express what they
// are.
// Convert newlines and tabs in the arguments to words to better
// express what they are.
$arg = array_map(function($value) {
if ($value === "\n") {
return 'Newline';
@ -76,7 +76,8 @@ class ParseError {
$message = sprintf($message, ...$arg);
}
// Wrap with preamble and location
// TODO: the file path should be middle-elided when necessary so that the message does not exceed 1024 bytes
// TODO: the file path should be middle-elided when necessary so that
// the message does not exceed 1024 bytes
$message = sprintf("HTML5 Parse Error: \"%s\" in %s", $message, $file);
if ($line) {
$message .= sprintf(" on line %s, column %s", $line, $column);

9
lib/Tokenizer.php

@ -366,14 +366,7 @@ class Tokenizer {
# U+003F QUESTION MARK (?)
elseif ($char === '?') {
# Parse error. Switch to the bogus comment state.
// Making errors more expressive.
if ($char !== '') {
$this->error(ParseError::TAG_NAME_EXPECTED);
} else {
$this->error(ParseError::UNEXPECTED_EOF);
}
$this->error(ParseError::TAG_NAME_EXPECTED);
$this->state = self::BOGUS_COMMENT_STATE;
}
# Anything else

Loading…
Cancel
Save