|
|
@ -102,62 +102,31 @@ class UTF8 implements StatelessEncoding { |
|
|
|
return $bytes; |
|
|
|
} |
|
|
|
|
|
|
|
/** Advance $distance characters through the string |
|
|
|
* |
|
|
|
* If $distance is negative, the operation will be performed in reverse |
|
|
|
* |
|
|
|
* If the end (or beginning) of the string was reached before the end of the operation, the remaining number of requested characters is returned |
|
|
|
*/ |
|
|
|
public function seek(int $distance): int { |
|
|
|
if ($distance > 0) { |
|
|
|
if ($this->posByte == strlen($this->string)) { |
|
|
|
// if we're already at the end of the string, we can't go further |
|
|
|
return $distance; |
|
|
|
} |
|
|
|
do { |
|
|
|
// get the next code point; this automatically increments the character position |
|
|
|
$p = $this->nextCode(); |
|
|
|
} while (--$distance && $p !== false); // stop after we have skipped the desired number of characters, or reached EOF |
|
|
|
return $distance; |
|
|
|
} elseif ($distance < 0) { |
|
|
|
$distance = abs($distance); |
|
|
|
if (!$this->posByte) { |
|
|
|
// if we're already at the start of the string, we can't go further back |
|
|
|
return $distance; |
|
|
|
} |
|
|
|
$mode = $this->errMode; |
|
|
|
$this->errMode = self::MODE_NULL; |
|
|
|
do { |
|
|
|
$this->sync($this->posByte - 1); |
|
|
|
// manually decrement the character position |
|
|
|
$this->posChar--; |
|
|
|
} while (--$distance && $this->posByte); |
|
|
|
$this->errMode = $mode; |
|
|
|
return $distance; |
|
|
|
} else { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** Synchronize to the byte offset of the start of the nearest character at or before byte offset $pos */ |
|
|
|
protected function sync(int $pos) { |
|
|
|
$b = ord(@$this->string[$pos]); |
|
|
|
if ($b < 0x80) { |
|
|
|
// if the byte is an ASCII byte or the end of input, then this is already a synchronized position |
|
|
|
$this->posByte = $pos; |
|
|
|
} else { |
|
|
|
$s = $pos; |
|
|
|
while ($b >= 0x80 && $b <= 0xBF && $pos > 0 && ($s - $pos) < 3) { // go back at most three bytes, no further than the start of the string, and only as long as the byte remains a continuation byte |
|
|
|
$b = ord(@$this->string[--$pos]); |
|
|
|
} |
|
|
|
$this->posByte = $pos; |
|
|
|
// decrement the character position because nextCode() increments it |
|
|
|
/** Implements backward seeking $distance characters */ |
|
|
|
protected function seekBack(int $distance): int { |
|
|
|
while ($distance > 0 && $this->posByte > 0) { |
|
|
|
$distance--; |
|
|
|
$this->posChar--; |
|
|
|
if (is_null($this->nextCode())) { |
|
|
|
$this->posByte = $s; |
|
|
|
$pos = $this->posByte - 1; |
|
|
|
$b = ord(@$this->string[$pos]); |
|
|
|
if ($b < 0x80) { |
|
|
|
// if the byte is an ASCII byte or the end of input, then this is already a synchronized position |
|
|
|
$this->posByte = $pos; |
|
|
|
} else { |
|
|
|
$this->posByte = ($this->posByte > $s) ? $pos : $s; |
|
|
|
$s = $pos; |
|
|
|
while ($b >= 0x80 && $b <= 0xBF && $pos > 0 && ($s - $pos) < 3) { // go back at most three bytes, no further than the start of the string, and only as long as the byte remains a continuation byte |
|
|
|
$b = ord(@$this->string[--$pos]); |
|
|
|
} |
|
|
|
$this->posByte = $pos; |
|
|
|
// decrement the character position because nextCode() increments it |
|
|
|
$this->posChar--; |
|
|
|
if (is_null($this->nextCode())) { |
|
|
|
$this->posByte = $s; |
|
|
|
} else { |
|
|
|
$this->posByte = ($this->posByte > $s) ? $pos : $s; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
return $distance; |
|
|
|
} |
|
|
|
} |
|
|
|