From 87e34b307454c1d412bef80d7226db32962b5f5e Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 25 Sep 2020 17:22:10 -0400 Subject: [PATCH] Make posErr fully generic --- lib/Encoding/Big5.php | 3 --- lib/Encoding/EUCKR.php | 4 ---- lib/Encoding/GBCommon.php | 7 ------- lib/Encoding/GenericEncoding.php | 4 ++-- lib/Encoding/UTF16.php | 2 ++ lib/Encoding/UTF8.php | 2 -- 6 files changed, 4 insertions(+), 18 deletions(-) diff --git a/lib/Encoding/Big5.php b/lib/Encoding/Big5.php index cb237a4..f22d5c4 100644 --- a/lib/Encoding/Big5.php +++ b/lib/Encoding/Big5.php @@ -62,10 +62,8 @@ class Big5 implements StatelessEncoding { return $code; } else { if ($b < 0x80) { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1); } else { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2); } } @@ -79,7 +77,6 @@ class Big5 implements StatelessEncoding { } else { // dirty EOF $this->dirtyEOF = 1; - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF); } } diff --git a/lib/Encoding/EUCKR.php b/lib/Encoding/EUCKR.php index 2345f3b..4eda27c 100644 --- a/lib/Encoding/EUCKR.php +++ b/lib/Encoding/EUCKR.php @@ -33,7 +33,6 @@ class EUCKR implements StatelessEncoding { if ($b < 0x80) { return $b; } elseif ($b == 0x80 || $b == 0xFF) { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 1); } else { $lead = $b; @@ -49,10 +48,8 @@ class EUCKR implements StatelessEncoding { return $code; } else { if ($b < 0x80) { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1); } else { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2); } } @@ -66,7 +63,6 @@ class EUCKR implements StatelessEncoding { } else { // dirty EOF $this->dirtyEOF = 1; - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF); } } diff --git a/lib/Encoding/GBCommon.php b/lib/Encoding/GBCommon.php index 29a2d31..c48b361 100644 --- a/lib/Encoding/GBCommon.php +++ b/lib/Encoding/GBCommon.php @@ -29,7 +29,6 @@ abstract class GBCommon implements StatelessEncoding { $first = $b; continue; } else { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } elseif ($second === 0) { @@ -42,10 +41,8 @@ abstract class GBCommon implements StatelessEncoding { $pointer = ($first - 0x81) * 190 + ($b - $offset); return self::TABLE_GBK[$pointer]; } elseif ($b < 0x80) { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, --$this->posByte); } else { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } @@ -55,7 +52,6 @@ abstract class GBCommon implements StatelessEncoding { continue; } else { $this->posByte -= 2; - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } else { @@ -75,12 +71,10 @@ abstract class GBCommon implements StatelessEncoding { if (isset($codePointOffset)) { return $codePointOffset + $pointer - $offset; } else { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } else { $this->posByte -= 3; - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } @@ -93,7 +87,6 @@ abstract class GBCommon implements StatelessEncoding { } else { // dirty EOF; note how many bytes the last character had $this->dirtyEOF = ($third ? 3 : ($second ? 2 : 1)); - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF); } } diff --git a/lib/Encoding/GenericEncoding.php b/lib/Encoding/GenericEncoding.php index e6435d9..ffb38da 100644 --- a/lib/Encoding/GenericEncoding.php +++ b/lib/Encoding/GenericEncoding.php @@ -159,9 +159,9 @@ trait GenericEncoding { } /** Handles decoding errors */ - protected function errDec(int $mode, int $charOffset = -1, int $byteOffset = -1) { + protected function errDec(int $mode, int $charOffset, int $byteOffset) { assert(in_array($mode, [self::MODE_NULL, self::MODE_REPLACE, self::MODE_FATAL]), "Invalid error mode $mode"); - assert($mode !== self::MODE_FATAL || ($charOffset > -1 && $byteOffset > -1), "Offsets for error reporting not supplied"); + $this->posErr = $this->posChar; switch ($mode) { case self::MODE_NULL: // used internally during backward seeking for some encodings diff --git a/lib/Encoding/UTF16.php b/lib/Encoding/UTF16.php index 7f1bad9..5bf0b01 100644 --- a/lib/Encoding/UTF16.php +++ b/lib/Encoding/UTF16.php @@ -33,6 +33,7 @@ abstract class UTF16 implements Encoding { return $lead_s; } else { $this->posByte -= 2; + $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2); } } else { @@ -43,6 +44,7 @@ abstract class UTF16 implements Encoding { if ($this->allowSurrogates) { return $code; } else { + $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2); } } else { diff --git a/lib/Encoding/UTF8.php b/lib/Encoding/UTF8.php index c3cebb3..3b7835c 100644 --- a/lib/Encoding/UTF8.php +++ b/lib/Encoding/UTF8.php @@ -52,11 +52,9 @@ class UTF8 implements StatelessEncoding { } $point = $b & 0x7; } else { // invalid byte - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar, $this->posByte); } } elseif ($b < $lower || $b > $upper) { - $this->posErr = $this->posChar; return $this->errDec($this->errMode, $this->posChar, $this->posByte--); } else { $lower = 0x80;