Browse Source

Move dirty EOF handling to UTF-16

It remains useful for this encoding, which is other self-synchronizing
multi-byte
J. King 4 years ago
parent
commit
be034a08e0
  1. 9
      lib/Encoding/AbstractEncoding.php
  2. 7
      lib/Encoding/UTF16.php

9
lib/Encoding/AbstractEncoding.php

@ -17,8 +17,6 @@ abstract class AbstractEncoding implements Encoding {
protected $lenByte = null;
/** @var int $lenChar The length of the string in characters, if known */
protected $lenChar = null;
/** To be removed */
protected $dirtyEOF = 0;
/** @var array $errStack A list of error data to aid in backwards seeking; the most recent error is kept off the stack */
protected $errStack = [];
/** @var int $errMark The byte position marking the most recent error. The one or more bytes previous to this position constitute an invalid character */
@ -89,13 +87,6 @@ abstract class AbstractEncoding implements Encoding {
if (!$this->posChar) {
return $distance;
}
if ($this->dirtyEOF > 0) {
// if we are at the end of the string and it did not terminate cleanly, go back the correct number of dirty bytes to seek through the last character
$this->posByte -= $this->dirtyEOF;
$this->dirtyEOF = 0;
$distance--;
$this->posChar--;
}
$mode = $this->errMode;
$this->errMode = self::MODE_NULL;
$out = $this->seekBack($distance);

7
lib/Encoding/UTF16.php

@ -8,6 +8,7 @@ namespace MensBeam\Intl\Encoding;
abstract class UTF16 extends AbstractEncoding {
protected $selfSynchronizing = true;
protected $dirtyEOF = 0;
public function nextCode() {
$lead_b = null;
@ -80,6 +81,12 @@ abstract class UTF16 extends AbstractEncoding {
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
if ($this->dirtyEOF && $distance) {
$distance--;
$this->posChar--;
$this->posByte -= $this->dirtyEOF;
$this->dirtyEOF = 0;
}
while ($distance > 0 && $this->posByte > 0) {
$distance--;
$this->posChar--;

Loading…
Cancel
Save