From 6417e8f0be8ecef8888ab680d337d6ef3b75326d Mon Sep 17 00:00:00 2001 From: "J. King" Date: Thu, 1 Oct 2020 18:52:33 -0400 Subject: [PATCH] Start overhauling error handling; adjust coverage annotations --- lib/Encoding/AbstractEncoding.php | 49 +++++++++++++++++------ tests/cases/Encoding/TestBig5.php | 4 +- tests/cases/Encoding/TestEUCKR.php | 4 +- tests/cases/Encoding/TestGB18030.php | 6 +-- tests/cases/Encoding/TestSingleByte.php | 4 +- tests/cases/Encoding/TestUTF16LE.php | 2 +- tests/cases/Encoding/TestUTF8.php | 4 +- tests/cases/Encoding/TestXUserDefined.php | 2 +- 8 files changed, 49 insertions(+), 26 deletions(-) diff --git a/lib/Encoding/AbstractEncoding.php b/lib/Encoding/AbstractEncoding.php index 93bc5e3..7fbffab 100644 --- a/lib/Encoding/AbstractEncoding.php +++ b/lib/Encoding/AbstractEncoding.php @@ -7,15 +7,32 @@ declare(strict_types=1); namespace MensBeam\Intl\Encoding; abstract class AbstractEncoding implements Encoding { + /** @var string $string The string being decoded */ protected $string; + /** @var int $posByte The current byte position in the string */ protected $posByte = 0; + /** @var int $posChar The current character (code point) position in the string */ protected $posChar = 0; + /** @var int $lenByte The length of the string, in bytes */ protected $lenByte = null; + /** @var int $lenChar The length of the string in characters, if known */ protected $lenChar = null; + /** To be removed */ protected $dirtyEOF = 0; + /** @var array $errStack A list of error data to aid in backwards seeking; the most recent error is kept off the stack */ + protected $errStack = []; + /** @var int $errMark The byte position marking the most recent error. The one or more bytes previous to this position constitute an invalid character */ + protected $errMark = -1; + /** @var int $errSync The byte position to which to move to skip over the most recent erroneous character */ + protected $errSync = -2; + /** @var int $errMode The selected error mode (fatal or replace) */ protected $errMode = self::MODE_REPLACE; + /** @var bool $allowSurrogates Whether surrogates in encodings other than UTF-16 should be passed through */ protected $allowSurrogates = false; + /** @var bool $selfSynchronizing Whether the concrete class represents a self-synchronizing decoder. Such decoders do not use the error stack */ protected $selfSynchronizing = false; + /** @var string[] $stateProps The list of properties which constitutee state which must be saved when peeking/seeking; some encodings may add to this last for their own purposes */ + protected $stateProps = ["posChar", "posByte", "posErr", "errStack", "errMark", "errSync"]; public $posErr = 0; @@ -147,11 +164,11 @@ abstract class AbstractEncoding implements Encoding { /** Returns a copy of the decoder's state to keep in memory */ protected function stateSave(): array { - return [ - 'posChar' => $this->posChar, - 'posByte' => $this->posByte, - 'posErr' => $this->posErr, - ]; + $out = []; + foreach ($this->stateProps as $prop) { + $out[$prop] = $this->$prop; + } + return $out; } /** Sets the decoder's state to the values specified */ @@ -164,16 +181,22 @@ abstract class AbstractEncoding implements Encoding { /** Handles decoding errors */ protected function errDec(int $mode, int $charOffset, int $byteOffset) { assert(in_array($mode, [self::MODE_NULL, self::MODE_REPLACE, self::MODE_FATAL]), "Invalid error mode $mode"); - $this->posErr = $this->posChar; - switch ($mode) { - case self::MODE_NULL: - // used internally during backward seeking for some encodings - return null; // @codeCoverageIgnore - case self::MODE_REPLACE: - return 0xFFFD; - case self::MODE_FATAL: + if ($mode !== self::MODE_NULL) { + // expose the error to the user; this disambiguates a literal replacement character + $this->posErr = $this->posChar; + // unless the decoder is self-synchronizing, mark the error so that it can be skipped when seeking back + if (!$this->selfSynchronizing) { + $this->errStack[] = [$this->errMark, $this->errSync]; + $this->errMark = $this->posByte; + $this->errSync = $byteOffset; + } + if ($mode === self::MODE_FATAL) { throw new DecoderException("Invalid code sequence at character offset $charOffset (byte offset $byteOffset)", self::E_INVALID_BYTE); + } else { + return 0xFFFD; + } } + return null; } /** Handles encoding errors */ diff --git a/tests/cases/Encoding/TestBig5.php b/tests/cases/Encoding/TestBig5.php index 68ef824..8bfeef3 100644 --- a/tests/cases/Encoding/TestBig5.php +++ b/tests/cases/Encoding/TestBig5.php @@ -31,7 +31,7 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints * @covers MensBeam\Intl\Encoding\Big5::encode - * @covers MensBeam\Intl\Encoding\Big5::err + * @covers MensBeam\Intl\Encoding\Big5::errEnc */ public function testEncodeCodePoints(bool $fatal, $input, $exp) { return parent::testEncodeCodePoints($fatal, $input, $exp); @@ -112,7 +112,7 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest { } /** - * @covers MensBeam\Intl\Encoding\Big5::err + * @covers MensBeam\Intl\Encoding\Big5::errDec */ public function testReplacementModes() { return parent::testReplacementModes(); diff --git a/tests/cases/Encoding/TestEUCKR.php b/tests/cases/Encoding/TestEUCKR.php index c8eb2bc..1ea5f94 100644 --- a/tests/cases/Encoding/TestEUCKR.php +++ b/tests/cases/Encoding/TestEUCKR.php @@ -31,7 +31,7 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints * @covers MensBeam\Intl\Encoding\EUCKR::encode - * @covers MensBeam\Intl\Encoding\EUCKR::err + * @covers MensBeam\Intl\Encoding\EUCKR::errEnc */ public function testEncodeCodePoints(bool $fatal, $input, $exp) { return parent::testEncodeCodePoints($fatal, $input, $exp); @@ -112,7 +112,7 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest { } /** - * @covers MensBeam\Intl\Encoding\EUCKR::err + * @covers MensBeam\Intl\Encoding\EUCKR::errDec */ public function testReplacementModes() { return parent::testReplacementModes(); diff --git a/tests/cases/Encoding/TestGB18030.php b/tests/cases/Encoding/TestGB18030.php index 331cc9a..63de0ed 100644 --- a/tests/cases/Encoding/TestGB18030.php +++ b/tests/cases/Encoding/TestGB18030.php @@ -36,9 +36,9 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints * @covers MensBeam\Intl\Encoding\GB18030::encode - * @covers MensBeam\Intl\Encoding\GB18030::err + * @covers MensBeam\Intl\Encoding\GB18030::errEnc * @covers MensBeam\Intl\Encoding\GBK::encode - * @covers MensBeam\Intl\Encoding\GBK::err + * @covers MensBeam\Intl\Encoding\GBK::errEnc */ public function testEncodeCodePoints(bool $fatal, $input, $exp, $class = self::class) { $this->testedClass = $class; @@ -120,7 +120,7 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest { } /** - * @covers MensBeam\Intl\Encoding\GB18030::err + * @covers MensBeam\Intl\Encoding\GB18030::errDec */ public function testReplacementModes() { return parent::testReplacementModes(); diff --git a/tests/cases/Encoding/TestSingleByte.php b/tests/cases/Encoding/TestSingleByte.php index 36399ce..b0cdb4d 100644 --- a/tests/cases/Encoding/TestSingleByte.php +++ b/tests/cases/Encoding/TestSingleByte.php @@ -186,7 +186,7 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideBrokenStrings - * @covers MensBeam\Intl\Encoding\SingleByteEncoding::err + * @covers MensBeam\Intl\Encoding\SingleByteEncoding::errDec */ public function testReplacementModes(string $input = "", string $class = SingleByteEncoding::class) { $this->testedClass = $class; @@ -286,7 +286,7 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideInvalids * @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode - * @covers MensBeam\Intl\Encoding\SingleByteEncoding::err + * @covers MensBeam\Intl\Encoding\SingleByteEncoding::errEnc */ public function testEncodeInvalidCodePoints(string $class, bool $mode, int $input, $exp) { if ($exp instanceof \Throwable) { diff --git a/tests/cases/Encoding/TestUTF16LE.php b/tests/cases/Encoding/TestUTF16LE.php index 46290e6..0d4f4ad 100644 --- a/tests/cases/Encoding/TestUTF16LE.php +++ b/tests/cases/Encoding/TestUTF16LE.php @@ -102,7 +102,7 @@ class TestUTF16LE extends \MensBeam\Intl\Test\DecoderTest { } /** - * @covers MensBeam\Intl\Encoding\UTF16::err + * @covers MensBeam\Intl\Encoding\UTF16::errDec */ public function testReplacementModes() { return parent::testReplacementModes(); diff --git a/tests/cases/Encoding/TestUTF8.php b/tests/cases/Encoding/TestUTF8.php index 19304f8..61d74be 100644 --- a/tests/cases/Encoding/TestUTF8.php +++ b/tests/cases/Encoding/TestUTF8.php @@ -31,7 +31,7 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints * @covers MensBeam\Intl\Encoding\UTF8::encode - * @covers MensBeam\Intl\Encoding\UTF8::err + * @covers MensBeam\Intl\Encoding\UTF8::errEnc */ public function testEncodeCodePoints(bool $fatal, $input, $exp) { return parent::testEncodeCodePoints($fatal, $input, $exp); @@ -112,7 +112,7 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest { } /** - * @covers MensBeam\Intl\Encoding\UTF8::err + * @covers MensBeam\Intl\Encoding\UTF8::errDec */ public function testReplacementModes() { return parent::testReplacementModes(); diff --git a/tests/cases/Encoding/TestXUserDefined.php b/tests/cases/Encoding/TestXUserDefined.php index 28aefd2..42caeb5 100644 --- a/tests/cases/Encoding/TestXUserDefined.php +++ b/tests/cases/Encoding/TestXUserDefined.php @@ -93,7 +93,7 @@ class TestXUserDefined extends \MensBeam\Intl\Test\DecoderTest { } /** - * @covers MensBeam\Intl\Encoding\XUserDefined::err + * @covers MensBeam\Intl\Encoding\XUserDefined::errDec */ public function testReplacementModes() { return parent::testReplacementModes();