Browse Source

Make posErr fully generic

span
J. King 4 years ago
parent
commit
87e34b3074
  1. 3
      lib/Encoding/Big5.php
  2. 4
      lib/Encoding/EUCKR.php
  3. 7
      lib/Encoding/GBCommon.php
  4. 4
      lib/Encoding/GenericEncoding.php
  5. 2
      lib/Encoding/UTF16.php
  6. 2
      lib/Encoding/UTF8.php

3
lib/Encoding/Big5.php

@ -62,10 +62,8 @@ class Big5 implements StatelessEncoding {
return $code;
} else {
if ($b < 0x80) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1);
} else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2);
}
}
@ -79,7 +77,6 @@ class Big5 implements StatelessEncoding {
} else {
// dirty EOF
$this->dirtyEOF = 1;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
}
}

4
lib/Encoding/EUCKR.php

@ -33,7 +33,6 @@ class EUCKR implements StatelessEncoding {
if ($b < 0x80) {
return $b;
} elseif ($b == 0x80 || $b == 0xFF) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 1);
} else {
$lead = $b;
@ -49,10 +48,8 @@ class EUCKR implements StatelessEncoding {
return $code;
} else {
if ($b < 0x80) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1);
} else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2);
}
}
@ -66,7 +63,6 @@ class EUCKR implements StatelessEncoding {
} else {
// dirty EOF
$this->dirtyEOF = 1;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
}
}

7
lib/Encoding/GBCommon.php

@ -29,7 +29,6 @@ abstract class GBCommon implements StatelessEncoding {
$first = $b;
continue;
} else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
}
} elseif ($second === 0) {
@ -42,10 +41,8 @@ abstract class GBCommon implements StatelessEncoding {
$pointer = ($first - 0x81) * 190 + ($b - $offset);
return self::TABLE_GBK[$pointer];
} elseif ($b < 0x80) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, --$this->posByte);
} else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
}
}
@ -55,7 +52,6 @@ abstract class GBCommon implements StatelessEncoding {
continue;
} else {
$this->posByte -= 2;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
}
} else {
@ -75,12 +71,10 @@ abstract class GBCommon implements StatelessEncoding {
if (isset($codePointOffset)) {
return $codePointOffset + $pointer - $offset;
} else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
}
} else {
$this->posByte -= 3;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1);
}
}
@ -93,7 +87,6 @@ abstract class GBCommon implements StatelessEncoding {
} else {
// dirty EOF; note how many bytes the last character had
$this->dirtyEOF = ($third ? 3 : ($second ? 2 : 1));
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
}
}

4
lib/Encoding/GenericEncoding.php

@ -159,9 +159,9 @@ trait GenericEncoding {
}
/** Handles decoding errors */
protected function errDec(int $mode, int $charOffset = -1, int $byteOffset = -1) {
protected function errDec(int $mode, int $charOffset, int $byteOffset) {
assert(in_array($mode, [self::MODE_NULL, self::MODE_REPLACE, self::MODE_FATAL]), "Invalid error mode $mode");
assert($mode !== self::MODE_FATAL || ($charOffset > -1 && $byteOffset > -1), "Offsets for error reporting not supplied");
$this->posErr = $this->posChar;
switch ($mode) {
case self::MODE_NULL:
// used internally during backward seeking for some encodings

2
lib/Encoding/UTF16.php

@ -33,6 +33,7 @@ abstract class UTF16 implements Encoding {
return $lead_s;
} else {
$this->posByte -= 2;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2);
}
} else {
@ -43,6 +44,7 @@ abstract class UTF16 implements Encoding {
if ($this->allowSurrogates) {
return $code;
} else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2);
}
} else {

2
lib/Encoding/UTF8.php

@ -52,11 +52,9 @@ class UTF8 implements StatelessEncoding {
}
$point = $b & 0x7;
} else { // invalid byte
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar, $this->posByte);
}
} elseif ($b < $lower || $b > $upper) {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar, $this->posByte--);
} else {
$lower = 0x80;

Loading…
Cancel
Save