Browse Source

Minor simplifications

split-manual
J. King 3 years ago
parent
commit
8d5079deae
  1. 110
      lib/Data.php
  2. 41
      lib/TreeBuilder.php

110
lib/Data.php

@ -85,69 +85,62 @@ class Data {
}
// unless we're peeking, track line and column position, and whether we've hit EOF
if ($this->track) {
$this->checkChar($char);
}
return $char;
}
protected function checkChar(string $char): bool {
// track line and column number, and EOF
if ($char === "\n") {
$this->newlines[$this->data->posChar()] = $this->_column;
$this->_column = 0;
$this->_line++;
} elseif ($char === '') {
$this->eof = true;
return false;
} else {
$this->_column++;
$len = strlen($char);
$here = $this->data->posChar();
if ($this->lastError < $here) {
// look for erroneous characters
if ($len === 1) {
$ord = ord($char);
if (($ord < 0x20 && !in_array($ord, [0x0, 0x9, 0xA, 0xC])) || $ord === 0x7F) {
$this->error(ParseError::CONTROL_CHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
}
} elseif ($len === 2) {
if (ord($char[0]) == 0xC2) {
$ord = ord($char[1]);
if ($ord >= 0x80 && $ord <= 0x9F) {
if ($char === "\n") {
$this->newlines[$this->data->posChar()] = $this->_column;
$this->_column = 0;
$this->_line++;
} elseif ($char === '') {
$this->eof = true;
} else {
$this->_column++;
$len = strlen($char);
$here = $this->data->posChar();
if ($this->lastError < $here) {
// look for erroneous characters
if ($len === 1) {
$ord = ord($char);
if (($ord < 0x20 && !in_array($ord, [0x0, 0x9, 0xA, 0xC])) || $ord === 0x7F) {
$this->error(ParseError::CONTROL_CHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
}
}
} elseif ($len === 3) {
$head = ord($char[0]);
if ($head === 0xED) {
$tail = (ord($char[1]) << 8) + ord($char[2]);
if ($tail >= 0xA080 && $tail <= 0xBFBF) {
$this->error(ParseError::SURROGATE_IN_INPUT_STREAM);
$this->lastError = $here;
} elseif ($len === 2) {
if (ord($char[0]) == 0xC2) {
$ord = ord($char[1]);
if ($ord >= 0x80 && $ord <= 0x9F) {
$this->error(ParseError::CONTROL_CHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
}
}
} elseif ($head === 0xEF) {
$tail = (ord($char[1]) << 8) + ord($char[2]);
if (($tail >= 0xB790 && $tail <= 0xB7AF) || $tail >= 0xBFBE) {
} elseif ($len === 3) {
$head = ord($char[0]);
if ($head === 0xED) {
$tail = (ord($char[1]) << 8) + ord($char[2]);
if ($tail >= 0xA080 && $tail <= 0xBFBF) {
$this->error(ParseError::SURROGATE_IN_INPUT_STREAM);
$this->lastError = $here;
}
} elseif ($head === 0xEF) {
$tail = (ord($char[1]) << 8) + ord($char[2]);
if (($tail >= 0xB790 && $tail <= 0xB7AF) || $tail >= 0xBFBE) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
} elseif ($tail === 0xBFBD && $this->data->posErr === $here) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM, $this->data->posByte);
$this->lastError = $here;
}
}
} elseif ($len === 4) {
$tail = (ord($char[2]) << 8) + ord($char[3]);
if ($tail >= 0xBFBE) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
} elseif ($tail === 0xBFBD && $this->data->posErr === $here) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM, $this->data->posByte);
$this->lastError = $here;
}
$this->astrals[$here] = true;
}
} elseif ($len === 4) {
$tail = (ord($char[2]) << 8) + ord($char[3]);
if ($tail >= 0xBFBE) {
$this->error(ParseError::NONCHARACTER_IN_INPUT_STREAM);
$this->lastError = $here;
}
$this->astrals[$here] = true;
}
}
}
return true;
return $char;
}
public function unconsume(int $length = 1, bool $retreatPointer = true): void {
@ -194,20 +187,17 @@ class Data {
if ($this->track) {
// control characters produce parse errors
$match .= "\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F";
}
$out = $this->data->asciiSpanNot($match."\r\n", $limit);
if ($this->track) {
$out = $this->data->asciiSpanNot($match."\r\n", $limit);
$this->_column += ($this->data->posChar() - $start);
return $out;
} else {
return $this->data->asciiSpanNot($match."\r\n", $limit);
}
return $out;
}
public function peek(int $length = 1): string {
assert($length > 0, new Exception(Exception::DATA_INVALID_DATA_CONSUMPTION_LENGTH, $length));
$string = $this->data->peekChar($length);
return $string;
return $this->data->peekChar($length);
}
/** Returns an indexed array with the line and column positions of the requested offset from the current position */

41
lib/TreeBuilder.php

@ -3876,7 +3876,7 @@ class TreeBuilder {
];
}
public function insertCharacterToken(CharacterToken $token) {
public function insertCharacterToken(CharacterToken $token): void {
# 1. Let data be the characters passed to the algorithm, or, if no characters
# were explicitly specified, the character of the character token being
# processed.
@ -3890,7 +3890,8 @@ class TreeBuilder {
assert($adjustedInsertionLocation instanceof \DOMNode, new Exception(Exception::TREEBUILDER_INVALID_INSERTION_LOCATION));
# 3. If the adjusted insertion location is in a Document node, then abort these
# steps.
if ((($insertBefore === false) ? $adjustedInsertionLocation : $adjustedInsertionLocation->parentNode) instanceof \DOMDocument) {
// NOTE: foster parenting will never point to before the root element
if ($adjustedInsertionLocation instanceof \DOMDocument) {
return;
}
@ -3915,42 +3916,23 @@ class TreeBuilder {
}
}
public function insertCommentToken(CommentToken $token, \DOMNode $position = null) {
public function insertCommentToken(CommentToken $token, \DOMNode $position = null): void {
# When the steps below require the user agent to insert a comment while
# processing a comment token, optionally with an explicitly insertion position
# position, the user agent must run the following steps:
# 1. Let data be the data given in the comment token being processed.
// Already provided through the token object.
# 2. If position was specified, then let the adjusted insertion location be
# position. Otherwise, let adjusted insertion location be the appropriate place
# for inserting a node.
if (!is_null($position)) {
$adjustedInsertionLocation = $position;
$insertBefore = false;
} else {
$location = $this->appropriatePlaceForInsertingNode();
$adjustedInsertionLocation = $location['node'];
$insertBefore = $location['insert before'];
}
// OPTIMIZATION: Comments are never foster-parented
$position = $position ?? $this->appropriatePlaceForInsertingNode()['node'];
# 3. Create a Comment node whose data attribute is set to data and whose node
# document is the same as that of the node in which the adjusted insertion
# location finds itself.
if ($adjustedInsertionLocation instanceof \DOMDocument) {
$nodeDocument = $adjustedInsertionLocation;
} else {
$nodeDocument = $adjustedInsertionLocation->ownerDocument;
}
$commentNode = $nodeDocument->createComment($token->data);
# 4. Insert the newly created node at the adjusted insertion location.
if ($insertBefore === false) {
$adjustedInsertionLocation->appendChild($commentNode);
} else {
$adjustedInsertionLocation->parentNode->insertBefore($commentNode, $adjustedInsertionLocation);
}
$position->appendChild($this->DOM->createComment($token->data));
}
public function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null): Element {
@ -3963,8 +3945,6 @@ class TreeBuilder {
# Let the adjusted insertion location be the appropriate place for inserting
# a node.
$location = $this->appropriatePlaceForInsertingNode($intendedParent);
$adjustedInsertionLocation = $location['node'];
$insertBefore = $location['insert before'];
# Let element be the result of creating an element for the token in the given
# namespace, with the intended parent being the element in which the adjusted
# insertion location finds itself.
@ -3974,12 +3954,11 @@ class TreeBuilder {
# - 1. Push a new element queue onto the custom element reactions stack.
// DEVIATION: Unnecessary because there is no scripting in this implementation.
# - 2. Insert element at the adjusted insertion location.
if ($insertBefore === false) {
$adjustedInsertionLocation->appendChild($element);
if ($location['insert before'] === false) {
$location['node']->appendChild($element);
} else {
$adjustedInsertionLocation->parentNode->insertBefore($element, $adjustedInsertionLocation);
$location['node']->parentNode->insertBefore($element, $location['node']);
}
# - 3. Pop the element queue from the custom element reactions stack, and
# invoke custom element reactions in that queue.
// DEVIATION: Unnecessary because there is no scripting in this implementation.

Loading…
Cancel
Save