Browse Source

Make posErr fully generic

span
J. King 4 years ago
parent
commit
87e34b3074
  1. 3
      lib/Encoding/Big5.php
  2. 4
      lib/Encoding/EUCKR.php
  3. 7
      lib/Encoding/GBCommon.php
  4. 4
      lib/Encoding/GenericEncoding.php
  5. 2
      lib/Encoding/UTF16.php
  6. 2
      lib/Encoding/UTF8.php

3
lib/Encoding/Big5.php

@ -62,10 +62,8 @@ class Big5 implements StatelessEncoding {
return $code; return $code;
} else { } else {
if ($b < 0x80) { if ($b < 0x80) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1); return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1);
} else { } else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2); return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2);
} }
} }
@ -79,7 +77,6 @@ class Big5 implements StatelessEncoding {
} else { } else {
// dirty EOF // dirty EOF
$this->dirtyEOF = 1; $this->dirtyEOF = 1;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
} }
} }

4
lib/Encoding/EUCKR.php

@ -33,7 +33,6 @@ class EUCKR implements StatelessEncoding {
if ($b < 0x80) { if ($b < 0x80) {
return $b; return $b;
} elseif ($b == 0x80 || $b == 0xFF) { } elseif ($b == 0x80 || $b == 0xFF) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 1); return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 1);
} else { } else {
$lead = $b; $lead = $b;
@ -49,10 +48,8 @@ class EUCKR implements StatelessEncoding {
return $code; return $code;
} else { } else {
if ($b < 0x80) { if ($b < 0x80) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1); return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1);
} else { } else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2); return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2);
} }
} }
@ -66,7 +63,6 @@ class EUCKR implements StatelessEncoding {
} else { } else {
// dirty EOF // dirty EOF
$this->dirtyEOF = 1; $this->dirtyEOF = 1;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
} }
} }

7
lib/Encoding/GBCommon.php

@ -29,7 +29,6 @@ abstract class GBCommon implements StatelessEncoding {
$first = $b; $first = $b;
continue; continue;
} else { } else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
} }
} elseif ($second === 0) { } elseif ($second === 0) {
@ -42,10 +41,8 @@ abstract class GBCommon implements StatelessEncoding {
$pointer = ($first - 0x81) * 190 + ($b - $offset); $pointer = ($first - 0x81) * 190 + ($b - $offset);
return self::TABLE_GBK[$pointer]; return self::TABLE_GBK[$pointer];
} elseif ($b < 0x80) { } elseif ($b < 0x80) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, --$this->posByte); return $this->errDec($this->errMode, $this->posChar - 1, --$this->posByte);
} else { } else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
} }
} }
@ -55,7 +52,6 @@ abstract class GBCommon implements StatelessEncoding {
continue; continue;
} else { } else {
$this->posByte -= 2; $this->posByte -= 2;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
} }
} else { } else {
@ -75,12 +71,10 @@ abstract class GBCommon implements StatelessEncoding {
if (isset($codePointOffset)) { if (isset($codePointOffset)) {
return $codePointOffset + $pointer - $offset; return $codePointOffset + $pointer - $offset;
} else { } else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
} }
} else { } else {
$this->posByte -= 3; $this->posByte -= 3;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
} }
} }
@ -93,7 +87,6 @@ abstract class GBCommon implements StatelessEncoding {
} else { } else {
// dirty EOF; note how many bytes the last character had // dirty EOF; note how many bytes the last character had
$this->dirtyEOF = ($third ? 3 : ($second ? 2 : 1)); $this->dirtyEOF = ($third ? 3 : ($second ? 2 : 1));
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
} }
} }

4
lib/Encoding/GenericEncoding.php

@ -159,9 +159,9 @@ trait GenericEncoding {
} }
/** Handles decoding errors */ /** Handles decoding errors */
protected function errDec(int $mode, int $charOffset = -1, int $byteOffset = -1) { protected function errDec(int $mode, int $charOffset, int $byteOffset) {
assert(in_array($mode, [self::MODE_NULL, self::MODE_REPLACE, self::MODE_FATAL]), "Invalid error mode $mode"); assert(in_array($mode, [self::MODE_NULL, self::MODE_REPLACE, self::MODE_FATAL]), "Invalid error mode $mode");
assert($mode !== self::MODE_FATAL || ($charOffset > -1 && $byteOffset > -1), "Offsets for error reporting not supplied"); $this->posErr = $this->posChar;
switch ($mode) { switch ($mode) {
case self::MODE_NULL: case self::MODE_NULL:
// used internally during backward seeking for some encodings // used internally during backward seeking for some encodings

2
lib/Encoding/UTF16.php

@ -33,6 +33,7 @@ abstract class UTF16 implements Encoding {
return $lead_s; return $lead_s;
} else { } else {
$this->posByte -= 2; $this->posByte -= 2;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2);
} }
} else { } else {
@ -43,6 +44,7 @@ abstract class UTF16 implements Encoding {
if ($this->allowSurrogates) { if ($this->allowSurrogates) {
return $code; return $code;
} else { } else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2); return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2);
} }
} else { } else {

2
lib/Encoding/UTF8.php

@ -52,11 +52,9 @@ class UTF8 implements StatelessEncoding {
} }
$point = $b & 0x7; $point = $b & 0x7;
} else { // invalid byte } else { // invalid byte
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar, $this->posByte); return $this->errDec($this->errMode, $this->posChar, $this->posByte);
} }
} elseif ($b < $lower || $b > $upper) { } elseif ($b < $lower || $b > $upper) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar, $this->posByte--); return $this->errDec($this->errMode, $this->posChar, $this->posByte--);
} else { } else {
$lower = 0x80; $lower = 0x80;

Loading…
Cancel
Save