From 10328b6806c5bb08fbecb62a542785e579f99c5d Mon Sep 17 00:00:00 2001 From: "J. King" Date: Thu, 15 Oct 2020 16:19:57 -0400 Subject: [PATCH] Tests for general encoder --- lib/Encoding/Encoder.php | 43 ++-- tests/cases/Encoding/TestBig5.php | 94 +++++---- tests/cases/Encoding/TestEUCJP.php | 118 ++++++----- tests/cases/Encoding/TestEUCKR.php | 74 ++++--- tests/cases/Encoding/TestGB18030.php | 233 ++++++++++++---------- tests/cases/Encoding/TestISO2022JP.php | 10 + tests/cases/Encoding/TestShiftJIS.php | 100 +++++----- tests/cases/Encoding/TestSingleByte.php | 12 ++ tests/cases/Encoding/TestUTF8.php | 154 +++++++------- tests/cases/Encoding/TestXUserDefined.php | 10 + tests/lib/CoderDecoderTest.php | 21 ++ 11 files changed, 502 insertions(+), 367 deletions(-) diff --git a/lib/Encoding/Encoder.php b/lib/Encoding/Encoder.php index 9c7bc4f..b515aa7 100644 --- a/lib/Encoding/Encoder.php +++ b/lib/Encoding/Encoder.php @@ -22,14 +22,31 @@ class Encoder { if (!$l || !$l['encoder']) { throw new EncoderException("Label '$label' does not have an encoder", Encoder::E_UNAVAILABLE_ENCODER); } else { - $this->name = $s['name']; + $this->name = $l['name']; $this->fatal = $fatal; } } - public function encode(int $codePoint): string { + public function encode(iterable $codePoints): string { + $oldMode = $this->mode; + $this->reset(); + $out = ""; + try { + foreach ($codePoints as $codePoint) { + $out .= $this->encodeChar($codePoint); + } + if ($this->name === "ISO-2022-JP" && $this->mode !== self::MODE_ASCII) { + $out .= "\x1B\x28\x42"; + } + } finally { + $this->mode = $oldMode; + } + return $out; + } + + public function encodeChar(int $codePoint): string { if ($codePoint < 0 || $codePoint > 0x10FFFF) { - throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT); + throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", Encoding::E_INVALID_CODE_POINT); } switch ($this->name) { case "UTF-8": @@ -80,23 +97,25 @@ class Encoder { return Macintosh::encode($codePoint, $this->fatal); case "Shift_JIS": return ShiftJIS::encode($codePoint, $this->fatal); - case "windows1250": + case "windows-1250": return Windows1250::encode($codePoint, $this->fatal); - case "windows1251": + case "windows-1251": return Windows1251::encode($codePoint, $this->fatal); - case "windows1252": + case "windows-1252": return Windows1252::encode($codePoint, $this->fatal); - case "windows1253": + case "windows-1253": return Windows1253::encode($codePoint, $this->fatal); - case "windows1254": + case "windows-1254": return Windows1254::encode($codePoint, $this->fatal); - case "windows1255": + case "windows-1255": return Windows1255::encode($codePoint, $this->fatal); - case "windows1256": + case "windows-1256": return Windows1256::encode($codePoint, $this->fatal); - case "windows1257": + case "windows-1257": return Windows1257::encode($codePoint, $this->fatal); - case "windows874": + case "windows-1258": + return Windows1258::encode($codePoint, $this->fatal); + case "windows-874": return Windows874::encode($codePoint, $this->fatal); case "x-mac-cyrillic": return XMacCyrillic::encode($codePoint, $this->fatal); diff --git a/tests/cases/Encoding/TestBig5.php b/tests/cases/Encoding/TestBig5.php index d380de9..a899023 100644 --- a/tests/cases/Encoding/TestBig5.php +++ b/tests/cases/Encoding/TestBig5.php @@ -28,8 +28,51 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest { /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + public function provideCodePoints() { + return [ + 'U+0064 (HTML)' => [false, 0x64, "64"], + 'U+0064 (fatal)' => [true, 0x64, "64"], + 'U+00CA (HTML)' => [false, 0xCA, bin2hex("Ê")], + 'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+3007 (HTML)' => [false, 0x3007, "C6 E2"], + 'U+3007 (fatal)' => [true, 0x3007, "C6 E2"], + 'U+5341 (HTML)' => [false, 0x5341, "A4 51"], + 'U+5341 (fatal)' => [true, 0x5341, "A4 51"], + 'U+2561 (HTML)' => [false, 0x2561, "F9 EB"], + 'U+2561 (fatal)' => [true, 0x2561, "F9 EB"], + 'U+256D (HTML)' => [false, 0x256D, "A2 7E"], + 'U+256D (fatal)' => [true, 0x256D, "A2 7E"], + '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + ]; + } + + public function provideStrings() { + return [ + 'empty string' => ["", []], + 'sanity check' => ["40", [64]], + 'two-byte character' => ["D7 D7", [36290]], + 'EOF after first byte' => ["D7", [65533]], + 'low byte after first byte' => ["D7 39", [65533, 57]], + '0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]], + '0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]], + 'invalid high byte as first byte' => ["81 D7 00", [65533, 0]], + '0x7F after first byte' => ["D7 7F", [65533, 127]], + '0xFF after first byte' => ["D7 FF", [65533]], + 'invalid high byte after first byte' => ["D7 81", [65533]], + 'broken string' => ["00 FF 00", [0, 65533, 0]], + 'double-characters low' => ["88 62 88 64", [202, 772, 202, 780]], + 'double-characters high' => ["88 A3 88 A5", [234, 772, 234, 780]], + 'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 34508, 27700, 202, 772, 234, 780]], + 'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 36290, 36290, 98]], + ]; + } + /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\Big5::encode * @covers MensBeam\Intl\Encoding\Big5::errEnc */ @@ -37,6 +80,15 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Big5::encode + * @covers MensBeam\Intl\Encoding\Big5::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\Big5::__construct @@ -143,48 +195,6 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } - public function provideCodePoints() { - return [ - 'U+0064 (HTML)' => [false, 0x64, "64"], - 'U+0064 (fatal)' => [true, 0x64, "64"], - 'U+00CA (HTML)' => [false, 0xCA, bin2hex("Ê")], - 'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+3007 (HTML)' => [false, 0x3007, "C6 E2"], - 'U+3007 (fatal)' => [true, 0x3007, "C6 E2"], - 'U+5341 (HTML)' => [false, 0x5341, "A4 51"], - 'U+5341 (fatal)' => [true, 0x5341, "A4 51"], - 'U+2561 (HTML)' => [false, 0x2561, "F9 EB"], - 'U+2561 (fatal)' => [true, 0x2561, "F9 EB"], - 'U+256D (HTML)' => [false, 0x256D, "A2 7E"], - 'U+256D (fatal)' => [true, 0x256D, "A2 7E"], - '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - ]; - } - - public function provideStrings() { - return [ - 'empty string' => ["", []], - 'sanity check' => ["40", [64]], - 'two-byte character' => ["D7 D7", [36290]], - 'EOF after first byte' => ["D7", [65533]], - 'low byte after first byte' => ["D7 39", [65533, 57]], - '0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]], - '0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]], - 'invalid high byte as first byte' => ["81 D7 00", [65533, 0]], - '0x7F after first byte' => ["D7 7F", [65533, 127]], - '0xFF after first byte' => ["D7 FF", [65533]], - 'invalid high byte after first byte' => ["D7 81", [65533]], - 'broken string' => ["00 FF 00", [0, 65533, 0]], - 'double-characters low' => ["88 62 88 64", [202, 772, 202, 780]], - 'double-characters high' => ["88 A3 88 A5", [234, 772, 234, 780]], - 'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 34508, 27700, 202, 772, 234, 780]], - 'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 36290, 36290, 98]], - ]; - } - /** * @group optional */ diff --git a/tests/cases/Encoding/TestEUCJP.php b/tests/cases/Encoding/TestEUCJP.php index 962c2ef..e1354d1 100644 --- a/tests/cases/Encoding/TestEUCJP.php +++ b/tests/cases/Encoding/TestEUCJP.php @@ -28,8 +28,63 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest { /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + public function provideCodePoints() { + return [ + 'U+0064 (HTML)' => [false, 0x64, "64"], + 'U+0064 (fatal)' => [true, 0x64, "64"], + 'U+00A5 (HTML)' => [false, 0xA5, "5C"], + 'U+00A5 (fatal)' => [true, 0xA5, "5C"], + 'U+203E (HTML)' => [false, 0x203E, "7E"], + 'U+203E (fatal)' => [true, 0x203E, "7E"], + 'U+3088 (HTML)' => [false, 0x3088, "A4 E8"], + 'U+3088 (fatal)' => [true, 0x3088, "A4 E8"], + 'U+FF96 (HTML)' => [false, 0xFF96, "8E D6"], + 'U+FF96 (fatal)' => [true, 0xFF96, "8E D6"], + 'U+2212 (HTML)' => [false, 0x2212, "A1 DD"], + 'U+2212 (fatal)' => [true, 0x2212, "A1 DD"], + 'U+00E6 (HTML)' => [false, 0xE6, bin2hex("æ")], + 'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+FFE2 (HTML)' => [false, 0xFFE2, "A2 CC"], + 'U+FFE2 (fatal)' => [true, 0xFFE2, "A2 CC"], + 'U+2116 (HTML)' => [false, 0x2116, "AD E2"], + 'U+2116 (fatal)' => [true, 0x2116, "AD E2"], + '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + ]; + } + + public function provideStrings() { + return [ + 'empty string' => ["", []], + 'sanity check' => ["40", [64]], + 'former ASCII deviations' => ["5C 7E", [92, 126]], + 'changed multibyte index' => ["A1DD", [65293]], + 'JIS X 0201 range' => ["8EA1 8EDF", [65377, 65439]], + 'JIS X 0201 bogus range' => ["8EA0 8EE0", [65533, 65533]], + 'JIS X 0201 truncated character 1' => ["8E", [65533]], + 'JIS X 0201 truncated character 2' => ["8E 20", [65533, 32]], + 'JIS X 0201 truncated character 3' => ["8E FF", [65533]], + 'JIS X 0212 assigned range' => ["8FA2AF 8FEDE3", [728, 40869]], + 'JIS X 0212 total range' => ["8FA1A1 8FFEFE", [65533, 65533]], + 'JIS X 0212 bogus range 1' => ["8FA0A1 8FFFFE", [65533, 65533, 65533, 65533]], + 'JIS X 0212 bogus range 2' => ["8FA1A0 8FFEFF", [65533, 65533]], + 'JIS X 0212 truncated character 1' => ["8FA2", [65533]], + 'JIS X 0212 truncated character 2' => ["8FA2 20", [65533, 32]], + 'JIS X 0212 truncated character 3' => ["8FA2 FF", [65533]], + 'JIS X 0208 assigned range' => ["A1A1 FCFE", [12288, 65282]], + 'JIS X 0208 total range' => ["A1A1 FEFE", [12288, 65533]], + 'JIS X 0208 bogus range' => ["A1A0 A0FE", [65533, 65533, 65533]], + 'JIS X 0208 truncated character 1' => ["A1", [65533]], + 'JIS X 0208 truncated character 2' => ["A1 20", [65533, 32]], + 'JIS X 0208 truncated character 3' => ["A1 FF", [65533]], + ]; + } + /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\EUCJP::encode * @covers MensBeam\Intl\Encoding\EUCJP::errEnc */ @@ -37,6 +92,15 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\EUCJP::encode + * @covers MensBeam\Intl\Encoding\EUCJP::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\EUCJP::__construct @@ -142,60 +206,6 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } - public function provideCodePoints() { - return [ - 'U+0064 (HTML)' => [false, 0x64, "64"], - 'U+0064 (fatal)' => [true, 0x64, "64"], - 'U+00A5 (HTML)' => [false, 0xA5, "5C"], - 'U+00A5 (fatal)' => [true, 0xA5, "5C"], - 'U+203E (HTML)' => [false, 0x203E, "7E"], - 'U+203E (fatal)' => [true, 0x203E, "7E"], - 'U+3088 (HTML)' => [false, 0x3088, "A4 E8"], - 'U+3088 (fatal)' => [true, 0x3088, "A4 E8"], - 'U+FF96 (HTML)' => [false, 0xFF96, "8E D6"], - 'U+FF96 (fatal)' => [true, 0xFF96, "8E D6"], - 'U+2212 (HTML)' => [false, 0x2212, "A1 DD"], - 'U+2212 (fatal)' => [true, 0x2212, "A1 DD"], - 'U+00E6 (HTML)' => [false, 0xE6, bin2hex("æ")], - 'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+FFE2 (HTML)' => [false, 0xFFE2, "A2 CC"], - 'U+FFE2 (fatal)' => [true, 0xFFE2, "A2 CC"], - 'U+2116 (HTML)' => [false, 0x2116, "AD E2"], - 'U+2116 (fatal)' => [true, 0x2116, "AD E2"], - '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - ]; - } - - public function provideStrings() { - return [ - 'empty string' => ["", []], - 'sanity check' => ["40", [64]], - 'former ASCII deviations' => ["5C 7E", [92, 126]], - 'changed multibyte index' => ["A1DD", [65293]], - 'JIS X 0201 range' => ["8EA1 8EDF", [65377, 65439]], - 'JIS X 0201 bogus range' => ["8EA0 8EE0", [65533, 65533]], - 'JIS X 0201 truncated character 1' => ["8E", [65533]], - 'JIS X 0201 truncated character 2' => ["8E 20", [65533, 32]], - 'JIS X 0201 truncated character 3' => ["8E FF", [65533]], - 'JIS X 0212 assigned range' => ["8FA2AF 8FEDE3", [728, 40869]], - 'JIS X 0212 total range' => ["8FA1A1 8FFEFE", [65533, 65533]], - 'JIS X 0212 bogus range 1' => ["8FA0A1 8FFFFE", [65533, 65533, 65533, 65533]], - 'JIS X 0212 bogus range 2' => ["8FA1A0 8FFEFF", [65533, 65533]], - 'JIS X 0212 truncated character 1' => ["8FA2", [65533]], - 'JIS X 0212 truncated character 2' => ["8FA2 20", [65533, 32]], - 'JIS X 0212 truncated character 3' => ["8FA2 FF", [65533]], - 'JIS X 0208 assigned range' => ["A1A1 FCFE", [12288, 65282]], - 'JIS X 0208 total range' => ["A1A1 FEFE", [12288, 65533]], - 'JIS X 0208 bogus range' => ["A1A0 A0FE", [65533, 65533, 65533]], - 'JIS X 0208 truncated character 1' => ["A1", [65533]], - 'JIS X 0208 truncated character 2' => ["A1 20", [65533, 32]], - 'JIS X 0208 truncated character 3' => ["A1 FF", [65533]], - ]; - } - /** * @group optional */ diff --git a/tests/cases/Encoding/TestEUCKR.php b/tests/cases/Encoding/TestEUCKR.php index f518b40..c3eb7c9 100644 --- a/tests/cases/Encoding/TestEUCKR.php +++ b/tests/cases/Encoding/TestEUCKR.php @@ -28,8 +28,41 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest { /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + public function provideCodePoints() { + return [ + 'U+0064 (HTML)' => [false, 0x64, "64"], + 'U+0064 (fatal)' => [true, 0x64, "64"], + 'U+00CA (HTML)' => [false, 0xCA, bin2hex("Ê")], + 'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+ACF2 (HTML)' => [false, 0xACF2, "81 E9"], + 'U+ACF2 (fatal)' => [true, 0xACF2, "81 E9"], + '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + ]; + } + + public function provideStrings() { + return [ + 'empty string' => ["", []], + 'sanity check' => ["40", [64]], + 'two-byte character' => ["D7 D7", [21033]], + 'EOF after first byte' => ["D7", [65533]], + 'low byte after first byte' => ["D7 39", [65533, 57]], + '0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]], + '0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]], + '0x7F after first byte' => ["D7 7F", [65533, 127]], + '0xFF after first byte' => ["D7 FF", [65533]], + 'non-character' => ["A5 DC", [65533]], + 'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 30267, 12676, 45714, 45802]], + 'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 21033, 21033, 98]], + ]; + } + /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\EUCKR::encode * @covers MensBeam\Intl\Encoding\EUCKR::errEnc */ @@ -37,6 +70,15 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\EUCKR::encode + * @covers MensBeam\Intl\Encoding\EUCKR::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\EUCKR::__construct @@ -143,38 +185,6 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } - public function provideCodePoints() { - return [ - 'U+0064 (HTML)' => [false, 0x64, "64"], - 'U+0064 (fatal)' => [true, 0x64, "64"], - 'U+00CA (HTML)' => [false, 0xCA, bin2hex("Ê")], - 'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+ACF2 (HTML)' => [false, 0xACF2, "81 E9"], - 'U+ACF2 (fatal)' => [true, 0xACF2, "81 E9"], - '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - ]; - } - - public function provideStrings() { - return [ - 'empty string' => ["", []], - 'sanity check' => ["40", [64]], - 'two-byte character' => ["D7 D7", [21033]], - 'EOF after first byte' => ["D7", [65533]], - 'low byte after first byte' => ["D7 39", [65533, 57]], - '0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]], - '0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]], - '0x7F after first byte' => ["D7 7F", [65533, 127]], - '0xFF after first byte' => ["D7 FF", [65533]], - 'non-character' => ["A5 DC", [65533]], - 'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 30267, 12676, 45714, 45802]], - 'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 21033, 21033, 98]], - ]; - } - /** * @group optional */ diff --git a/tests/cases/Encoding/TestGB18030.php b/tests/cases/Encoding/TestGB18030.php index 7b8524c..bcb9354 100644 --- a/tests/cases/Encoding/TestGB18030.php +++ b/tests/cases/Encoding/TestGB18030.php @@ -33,8 +33,119 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest { $this->testedClass = GB18030::class; } + public function provideCodePoints() { + // bytes confirmed using Firefox + $series_gb18030 = [ + 'U+0064 (HTML)' => [false, 0x64, "64"], + 'U+0064 (fatal)' => [true, 0x64, "64"], + 'U+20AC (HTML)' => [false, 0x20AC, "A2 E3"], + 'U+20AC (fatal)' => [true, 0x20AC, "A2 E3"], + 'U+2164 (HTML)' => [false, 0x2164, "A2 F5"], + 'U+2164 (fatal)' => [true, 0x2164, "A2 F5"], + 'U+3A74 (HTML)' => [false, 0x3A74, "82 31 97 30"], + 'U+3A74 (fatal)' => [true, 0x3A74, "82 31 97 30"], + 'U+E7C7 (HTML)' => [false, 0xE7C7, "81 35 F4 37"], + 'U+E7C7 (fatal)' => [true, 0xE7C7, "81 35 F4 37"], + 'U+1D11E (HTML)' => [false, 0x1D11E, "94 32 BE 34"], + 'U+1D11E (fatal)' => [true, 0x1D11E, "94 32 BE 34"], + 'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("")], + 'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+3000 (HTML)' => [false, 0x3000, "A1 A1"], + 'U+3000 (fatal)' => [true, 0x3000, "A1 A1"], + '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + ]; + $series_gbk = [ + 'U+0064 (HTML)' => [false, 0x64, "64"], + 'U+0064 (fatal)' => [true, 0x64, "64"], + 'U+20AC (HTML)' => [false, 0x20AC, "80"], + 'U+20AC (fatal)' => [true, 0x20AC, "80"], + 'U+2164 (HTML)' => [false, 0x2164, "A2 F5"], + 'U+2164 (fatal)' => [true, 0x2164, "A2 F5"], + 'U+3A74 (HTML)' => [false, 0x3A74, bin2hex("㩴")], + 'U+3A74 (fatal)' => [true, 0x3A74, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+E7C7 (HTML)' => [false, 0xE7C7, bin2hex("")], + 'U+E7C7 (fatal)' => [true, 0xE7C7, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+1D11E (HTML)' => [false, 0x1D11E, bin2hex("𝄞")], + 'U+1D11E (fatal)' => [true, 0x1D11E, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("")], + 'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+3000 (HTML)' => [false, 0x3000, "A1 A1"], + 'U+3000 (fatal)' => [true, 0x3000, "A1 A1"], + '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + ]; + foreach ($series_gb18030 as $name => $test) { + array_push($test, GB18030::class); + yield "gb18030 $name" => $test; + } + foreach ($series_gbk as $name => $test) { + array_push($test, GBK::class); + yield "GBK $name" => $test; + } + } + + public function provideStrings() { + return [ + 'empty string' => ["", []], + // valid single characters + 'sanity check' => ["40", [64]], + 'special case for 0x80' => ["80", [8364]], + 'four-byte special case' => ["81 35 F4 37", [59335]], + 'two-byte character' => ["A8 4E", [8735]], + 'four-byte character' => ["82 31 A2 37", [15081]], + // cut sequences + 'EOF after first byte' => ["82", [65533]], + 'EOF after second byte' => ["82 30", [65533]], + 'EOF after third byte' => ["82 30 81", [65533]], + // invalid sequences + 'bad first byte' => ["FF 35 F4 37", [65533, 53, 65533]], + 'bad second byte' => ["81 FF F4 37", [65533, 65533]], + 'bad third byte' => ["81 35 FF 37", [65533, 53, 65533, 55]], + 'bad fourth byte' => ["81 35 F4 FF", [65533, 53, 65533]], + 'control first byte' => ["00 35 F4 37", [0, 53, 65533]], + 'control second byte' => ["81 00 F4 37", [65533, 0, 65533]], + 'control third byte' => ["81 35 00 37", [65533, 53, 0, 55]], + 'control fourth byte' => ["81 35 F4 00", [65533, 53, 65533, 0]], + // invalid sequences with clean EOF + 'bad first byte (padded)' => ["FF 35 F4 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]], + 'bad second byte (padded)' => ["81 FF F4 37 00 00 00 00", [65533, 65533, 55, 0, 0, 0, 0]], + 'bad third byte (padded)' => ["81 35 FF 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]], + 'bad fourth byte (padded)' => ["81 35 F4 FF 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0]], + 'control first byte (padded)' => ["00 35 F4 37 00 00 00 00", [0, 53, 65533, 55, 0, 0, 0, 0]], + 'control second byte (padded)' => ["81 00 F4 37 00 00 00 00", [65533, 0, 65533, 55, 0, 0, 0, 0]], + 'control third byte (padded)' => ["81 35 00 37 00 00 00 00", [65533, 53, 0, 55, 0, 0, 0, 0]], + 'control fourth byte (padded)' => ["81 35 F4 00 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0, 0]], + // out-of-range sequences + 'void sequence' => ["84 32 A4 39", [65533]], + 'void sequence 2' => ["FE 39 FE 39", [65533]], + // backward seeking tests + 'seek test 1' => ["81 81 81 30", [20118, 65533]], + 'seek test 2' => ["81 81 80", [20118, 8364]], + 'seek test 3' => ["81 81 00", [20118, 0]], + 'seek test 4' => ["81 81 81 00", [20118, 65533, 0]], + 'seek test 5' => ["81 30 30 30", [65533, 48, 48, 48]], + 'seek test 6' => ["81 30 81 81", [65533, 48, 20118]], + 'seek test 7' => ["30 30 81 81", [48, 48, 20118]], + 'seek test 8' => ["F8 83 FE 80", [40229, 18211]], + 'seek test 1 (padded)' => ["00 00 00 00 81 81 81 30 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 48, 0, 0, 0, 0]], + 'seek test 2 (padded)' => ["00 00 00 00 81 81 80 00 00 00 00", [0, 0, 0, 0, 20118, 8364, 0, 0, 0, 0]], + 'seek test 3 (padded)' => ["00 00 00 00 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 0, 0, 0, 0, 0]], + 'seek test 4 (padded)' => ["00 00 00 00 81 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 0, 0, 0, 0, 0]], + 'seek test 5 (padded)' => ["00 00 00 00 81 30 30 30 00 00 00 00", [0, 0, 0, 0, 65533, 48, 48, 48, 0, 0, 0, 0]], + 'seek test 6 (padded)' => ["00 00 00 00 81 30 81 81 00 00 00 00", [0, 0, 0, 0, 65533, 48, 20118, 0, 0, 0, 0]], + 'seek test 7 (padded)' => ["00 00 00 00 30 30 81 81 00 00 00 00", [0, 0, 0, 0, 48, 48, 20118, 0, 0, 0, 0]], + 'seek test 8 (padded)' => ["00 00 00 00 F8 83 FE 80 00 00 00 00", [0, 0, 0, 0, 40229, 18211, 0, 0, 0, 0]], + ]; + } + /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\GB18030::encode * @covers MensBeam\Intl\Encoding\GB18030::errEnc * @covers MensBeam\Intl\Encoding\GBK::encode @@ -45,6 +156,18 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\GB18030::encode + * @covers MensBeam\Intl\Encoding\GB18030::errEnc + * @covers MensBeam\Intl\Encoding\GBK::encode + * @covers MensBeam\Intl\Encoding\GBK::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp, $class = self::class) { + $this->testedClass = $class; + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\GB18030::__construct @@ -151,116 +274,6 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } - public function provideCodePoints() { - // bytes confirmed using Firefox - $series_gb18030 = [ - 'U+0064 (HTML)' => [false, 0x64, "64"], - 'U+0064 (fatal)' => [true, 0x64, "64"], - 'U+20AC (HTML)' => [false, 0x20AC, "A2 E3"], - 'U+20AC (fatal)' => [true, 0x20AC, "A2 E3"], - 'U+2164 (HTML)' => [false, 0x2164, "A2 F5"], - 'U+2164 (fatal)' => [true, 0x2164, "A2 F5"], - 'U+3A74 (HTML)' => [false, 0x3A74, "82 31 97 30"], - 'U+3A74 (fatal)' => [true, 0x3A74, "82 31 97 30"], - 'U+E7C7 (HTML)' => [false, 0xE7C7, "81 35 F4 37"], - 'U+E7C7 (fatal)' => [true, 0xE7C7, "81 35 F4 37"], - 'U+1D11E (HTML)' => [false, 0x1D11E, "94 32 BE 34"], - 'U+1D11E (fatal)' => [true, 0x1D11E, "94 32 BE 34"], - 'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("")], - 'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+3000 (HTML)' => [false, 0x3000, "A1 A1"], - 'U+3000 (fatal)' => [true, 0x3000, "A1 A1"], - '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - ]; - $series_gbk = [ - 'U+0064 (HTML)' => [false, 0x64, "64"], - 'U+0064 (fatal)' => [true, 0x64, "64"], - 'U+20AC (HTML)' => [false, 0x20AC, "80"], - 'U+20AC (fatal)' => [true, 0x20AC, "80"], - 'U+2164 (HTML)' => [false, 0x2164, "A2 F5"], - 'U+2164 (fatal)' => [true, 0x2164, "A2 F5"], - 'U+3A74 (HTML)' => [false, 0x3A74, bin2hex("㩴")], - 'U+3A74 (fatal)' => [true, 0x3A74, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+E7C7 (HTML)' => [false, 0xE7C7, bin2hex("")], - 'U+E7C7 (fatal)' => [true, 0xE7C7, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+1D11E (HTML)' => [false, 0x1D11E, bin2hex("𝄞")], - 'U+1D11E (fatal)' => [true, 0x1D11E, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("")], - 'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+3000 (HTML)' => [false, 0x3000, "A1 A1"], - 'U+3000 (fatal)' => [true, 0x3000, "A1 A1"], - '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - ]; - foreach ($series_gb18030 as $name => $test) { - array_push($test, GB18030::class); - yield "gb18030 $name" => $test; - } - foreach ($series_gbk as $name => $test) { - array_push($test, GBK::class); - yield "GBK $name" => $test; - } - } - - public function provideStrings() { - return [ - 'empty string' => ["", []], - // valid single characters - 'sanity check' => ["40", [64]], - 'special case for 0x80' => ["80", [8364]], - 'four-byte special case' => ["81 35 F4 37", [59335]], - 'two-byte character' => ["A8 4E", [8735]], - 'four-byte character' => ["82 31 A2 37", [15081]], - // cut sequences - 'EOF after first byte' => ["82", [65533]], - 'EOF after second byte' => ["82 30", [65533]], - 'EOF after third byte' => ["82 30 81", [65533]], - // invalid sequences - 'bad first byte' => ["FF 35 F4 37", [65533, 53, 65533]], - 'bad second byte' => ["81 FF F4 37", [65533, 65533]], - 'bad third byte' => ["81 35 FF 37", [65533, 53, 65533, 55]], - 'bad fourth byte' => ["81 35 F4 FF", [65533, 53, 65533]], - 'control first byte' => ["00 35 F4 37", [0, 53, 65533]], - 'control second byte' => ["81 00 F4 37", [65533, 0, 65533]], - 'control third byte' => ["81 35 00 37", [65533, 53, 0, 55]], - 'control fourth byte' => ["81 35 F4 00", [65533, 53, 65533, 0]], - // invalid sequences with clean EOF - 'bad first byte (padded)' => ["FF 35 F4 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]], - 'bad second byte (padded)' => ["81 FF F4 37 00 00 00 00", [65533, 65533, 55, 0, 0, 0, 0]], - 'bad third byte (padded)' => ["81 35 FF 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]], - 'bad fourth byte (padded)' => ["81 35 F4 FF 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0]], - 'control first byte (padded)' => ["00 35 F4 37 00 00 00 00", [0, 53, 65533, 55, 0, 0, 0, 0]], - 'control second byte (padded)' => ["81 00 F4 37 00 00 00 00", [65533, 0, 65533, 55, 0, 0, 0, 0]], - 'control third byte (padded)' => ["81 35 00 37 00 00 00 00", [65533, 53, 0, 55, 0, 0, 0, 0]], - 'control fourth byte (padded)' => ["81 35 F4 00 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0, 0]], - // out-of-range sequences - 'void sequence' => ["84 32 A4 39", [65533]], - 'void sequence 2' => ["FE 39 FE 39", [65533]], - // backward seeking tests - 'seek test 1' => ["81 81 81 30", [20118, 65533]], - 'seek test 2' => ["81 81 80", [20118, 8364]], - 'seek test 3' => ["81 81 00", [20118, 0]], - 'seek test 4' => ["81 81 81 00", [20118, 65533, 0]], - 'seek test 5' => ["81 30 30 30", [65533, 48, 48, 48]], - 'seek test 6' => ["81 30 81 81", [65533, 48, 20118]], - 'seek test 7' => ["30 30 81 81", [48, 48, 20118]], - 'seek test 8' => ["F8 83 FE 80", [40229, 18211]], - 'seek test 1 (padded)' => ["00 00 00 00 81 81 81 30 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 48, 0, 0, 0, 0]], - 'seek test 2 (padded)' => ["00 00 00 00 81 81 80 00 00 00 00", [0, 0, 0, 0, 20118, 8364, 0, 0, 0, 0]], - 'seek test 3 (padded)' => ["00 00 00 00 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 0, 0, 0, 0, 0]], - 'seek test 4 (padded)' => ["00 00 00 00 81 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 0, 0, 0, 0, 0]], - 'seek test 5 (padded)' => ["00 00 00 00 81 30 30 30 00 00 00 00", [0, 0, 0, 0, 65533, 48, 48, 48, 0, 0, 0, 0]], - 'seek test 6 (padded)' => ["00 00 00 00 81 30 81 81 00 00 00 00", [0, 0, 0, 0, 65533, 48, 20118, 0, 0, 0, 0]], - 'seek test 7 (padded)' => ["00 00 00 00 30 30 81 81 00 00 00 00", [0, 0, 0, 0, 48, 48, 20118, 0, 0, 0, 0]], - 'seek test 8 (padded)' => ["00 00 00 00 F8 83 FE 80 00 00 00 00", [0, 0, 0, 0, 40229, 18211, 0, 0, 0, 0]], - ]; - } - /** * @group optional */ diff --git a/tests/cases/Encoding/TestISO2022JP.php b/tests/cases/Encoding/TestISO2022JP.php index 0c9d064..d8b22e7 100644 --- a/tests/cases/Encoding/TestISO2022JP.php +++ b/tests/cases/Encoding/TestISO2022JP.php @@ -60,6 +60,7 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\ISO2022JP::encode * @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc */ @@ -67,6 +68,15 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\ISO2022JP::encode + * @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\ISO2022JP::__construct diff --git a/tests/cases/Encoding/TestShiftJIS.php b/tests/cases/Encoding/TestShiftJIS.php index 8b1d12c..c16f5f1 100644 --- a/tests/cases/Encoding/TestShiftJIS.php +++ b/tests/cases/Encoding/TestShiftJIS.php @@ -28,8 +28,54 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest { /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + public function provideCodePoints() { + return [ + 'U+0064 (HTML)' => [false, 0x64, "64"], + 'U+0064 (fatal)' => [true, 0x64, "64"], + 'U+00A5 (HTML)' => [false, 0xA5, "5C"], + 'U+00A5 (fatal)' => [true, 0xA5, "5C"], + 'U+203E (HTML)' => [false, 0x203E, "7E"], + 'U+203E (fatal)' => [true, 0x203E, "7E"], + 'U+3088 (HTML)' => [false, 0x3088, "82 E6"], + 'U+3088 (fatal)' => [true, 0x3088, "82 E6"], + 'U+FF96 (HTML)' => [false, 0xFF96, "D6"], + 'U+FF96 (fatal)' => [true, 0xFF96, "D6"], + 'U+2212 (HTML)' => [false, 0x2212, "81 7C"], + 'U+2212 (fatal)' => [true, 0x2212, "81 7C"], + 'U+00E6 (HTML)' => [false, 0xE6, bin2hex("æ")], + 'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + 'U+FFE2 (HTML)' => [false, 0xFFE2, "81 CA"], + 'U+FFE2 (fatal)' => [true, 0xFFE2, "81 CA"], + 'U+2116 (HTML)' => [false, 0x2116, "87 82"], + 'U+2116 (fatal)' => [true, 0x2116, "87 82"], + 'U+E000 (HTML)' => [false, 0xE000, bin2hex("")], + 'U+E000 (fatal)' => [true, 0xE000, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], + '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + ]; + } + + public function provideStrings() { + return [ + 'empty string' => ["", []], + 'sanity check' => ["40", [64]], + 'invalid byte' => ["FF", [65533]], + 'former ASCII deviations' => ["5C 7E", [92, 126]], + 'JIS X 0201 range' => ["A1 DF", [65377, 65439]], + 'EUDC range' => ["F040 F9FC", [57344, 59223]], + 'JIS X 0208 assigned range' => ["8140 FC4B", [12288, 40657]], + 'JIS X 0208 total range' => ["8140 FCFC", [12288, 65533]], + 'JIS X 0208 truncated character 1' => ["81", [65533]], + 'JIS X 0208 truncated character 2' => ["81 20", [65533, 32]], + 'JIS X 0208 truncated character 3' => ["81 FF", [65533]], + ]; + } + /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\ShiftJIS::encode * @covers MensBeam\Intl\Encoding\ShiftJIS::errEnc */ @@ -37,6 +83,15 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\ShiftJIS::encode + * @covers MensBeam\Intl\Encoding\ShiftJIS::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\ShiftJIS::__construct @@ -143,51 +198,6 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } - public function provideCodePoints() { - return [ - 'U+0064 (HTML)' => [false, 0x64, "64"], - 'U+0064 (fatal)' => [true, 0x64, "64"], - 'U+00A5 (HTML)' => [false, 0xA5, "5C"], - 'U+00A5 (fatal)' => [true, 0xA5, "5C"], - 'U+203E (HTML)' => [false, 0x203E, "7E"], - 'U+203E (fatal)' => [true, 0x203E, "7E"], - 'U+3088 (HTML)' => [false, 0x3088, "82 E6"], - 'U+3088 (fatal)' => [true, 0x3088, "82 E6"], - 'U+FF96 (HTML)' => [false, 0xFF96, "D6"], - 'U+FF96 (fatal)' => [true, 0xFF96, "D6"], - 'U+2212 (HTML)' => [false, 0x2212, "81 7C"], - 'U+2212 (fatal)' => [true, 0x2212, "81 7C"], - 'U+00E6 (HTML)' => [false, 0xE6, bin2hex("æ")], - 'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - 'U+FFE2 (HTML)' => [false, 0xFFE2, "81 CA"], - 'U+FFE2 (fatal)' => [true, 0xFFE2, "81 CA"], - 'U+2116 (HTML)' => [false, 0x2116, "87 82"], - 'U+2116 (fatal)' => [true, 0x2116, "87 82"], - 'U+E000 (HTML)' => [false, 0xE000, bin2hex("")], - 'U+E000 (fatal)' => [true, 0xE000, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], - '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - ]; - } - - public function provideStrings() { - return [ - 'empty string' => ["", []], - 'sanity check' => ["40", [64]], - 'invalid byte' => ["FF", [65533]], - 'former ASCII deviations' => ["5C 7E", [92, 126]], - 'JIS X 0201 range' => ["A1 DF", [65377, 65439]], - 'EUDC range' => ["F040 F9FC", [57344, 59223]], - 'JIS X 0208 assigned range' => ["8140 FC4B", [12288, 40657]], - 'JIS X 0208 total range' => ["8140 FCFC", [12288, 65533]], - 'JIS X 0208 truncated character 1' => ["81", [65533]], - 'JIS X 0208 truncated character 2' => ["81 20", [65533, 32]], - 'JIS X 0208 truncated character 3' => ["81 FF", [65533]], - ]; - } - /** * @group optional */ diff --git a/tests/cases/Encoding/TestSingleByte.php b/tests/cases/Encoding/TestSingleByte.php index 63c17aa..856fa81 100644 --- a/tests/cases/Encoding/TestSingleByte.php +++ b/tests/cases/Encoding/TestSingleByte.php @@ -8,6 +8,7 @@ namespace MensBeam\Intl\TestCase\Encoding; use MensBeam\Intl\Encoding\SingleByteEncoding; use MensBeam\Intl\Encoding\EncoderException; +use MensBeam\Intl\Encoding\Encoder; class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest { // maps taken from https://github.com/web-platform-tests/wpt/blob/d6c29bef8d4bcdfe4f689defca73360b07647d71/encoding/single-byte-decoder.html @@ -83,9 +84,20 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode */ public function testEncodeCodePoints(bool $fatal, $input, $exp, string $class = SingleByteEncoding::class) { + $e = new Encoder($class::NAME, $fatal); + $out = $e->encode($input); + $this->assertSame(bin2hex($exp), bin2hex($out)); + } + + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp, string $class = SingleByteEncoding::class) { $out = ""; foreach ($input as $code) { $out .= $class::encode($code, $fatal); diff --git a/tests/cases/Encoding/TestUTF8.php b/tests/cases/Encoding/TestUTF8.php index 05ab1dd..29a1450 100644 --- a/tests/cases/Encoding/TestUTF8.php +++ b/tests/cases/Encoding/TestUTF8.php @@ -28,8 +28,81 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest { /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + public function provideCodePoints() { + return [ + 'U+007A (HTML)' => [false, 0x7A, "7A"], + 'U+007A (fatal)' => [true, 0x7A, "7A"], + 'U+00A2 (HTML)' => [false, 0xA2, "C2 A2"], + 'U+00A2 (fatal)' => [true, 0xA2, "C2 A2"], + 'U+6C34 (HTML)' => [false, 0x6C34, "E6 B0 B4"], + 'U+6C34 (fatal)' => [true, 0x6C34, "E6 B0 B4"], + 'U+1D11E (HTML)' => [false, 0x1D11E, "F0 9D 84 9E"], + 'U+1D11E (fatal)' => [true, 0x1D11E, "F0 9D 84 9E"], + 'U+F8FF (HTML)' => [false, 0xF8FF, "EF A3 BF"], + 'U+F8FF (fatal)' => [true, 0xF8FF, "EF A3 BF"], + 'U+10FFFD (HTML)' => [false, 0x10FFFD, "F4 8F BF BD"], + 'U+10FFFD (fatal)' => [true, 0x10FFFD, "F4 8F BF BD"], + 'U+FFFE (HTML)' => [false, 0xFFFE, "EF BF BE"], + 'U+FFFE (fatal)' => [true, 0xFFFE, "EF BF BE"], + '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], + ]; + } + + public function provideStrings() { + return [ + // control samples + 'empty string' => ["", []], + 'sanity check' => ["61 62 63 31 32 33", [97, 98, 99, 49, 50, 51]], + 'multibyte control' => ["E5 8F A4 E6 B1 A0 E3 82 84 E8 9B 99 E9 A3 9B E3 81 B3 E8 BE BC E3 82 80 E6 B0 B4 E3 81 AE E9 9F B3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]], + 'mixed sample' => ["7A C2 A2 E6 B0 B4 F0 9D 84 9E EF A3 BF F4 8F BF BD EF BF BE", [122, 162, 27700, 119070, 63743, 1114109, 65534]], + // various invalid sequences + 'invalid code' => ["FF", [65533]], + 'ends early' => ["C0", [65533]], + 'ends early 2' => ["E0", [65533]], + 'invalid trail' => ["C0 00", [65533, 0]], + 'invalid trail 2' => ["C0 C0", [65533, 65533]], + 'invalid trail 3' => ["E0 00", [65533, 0]], + 'invalid trail 4' => ["E0 C0", [65533, 65533]], + 'invalid trail 5' => ["E0 80 00", [65533, 65533, 0]], + 'invalid trail 6' => ["E0 80 C0", [65533, 65533, 65533]], + '> 0x10FFFF' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], + 'obsolete lead byte' => ["FE 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], + 'overlong U+0000 - 2 bytes' => ["C0 80", [65533, 65533]], + 'overlong U+0000 - 3 bytes' => ["E0 80 80", [65533, 65533, 65533]], + 'overlong U+0000 - 4 bytes' => ["F0 80 80 80", [65533, 65533, 65533, 65533]], + 'overlong U+0000 - 5 bytes' => ["F8 80 80 80 80", [65533, 65533, 65533, 65533, 65533]], + 'overlong U+0000 - 6 bytes' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], + 'overlong U+007F - 2 bytes' => ["C1 BF", [65533, 65533]], + 'overlong U+007F - 3 bytes' => ["E0 81 BF", [65533, 65533, 65533]], + 'overlong U+007F - 4 bytes' => ["F0 80 81 BF", [65533, 65533, 65533, 65533]], + 'overlong U+007F - 5 bytes' => ["F8 80 80 81 BF", [65533, 65533, 65533, 65533, 65533]], + 'overlong U+007F - 6 bytes' => ["FC 80 80 80 81 BF", [65533, 65533, 65533, 65533, 65533, 65533]], + 'overlong U+07FF - 3 bytes' => ["E0 9F BF", [65533, 65533, 65533]], + 'overlong U+07FF - 4 bytes' => ["F0 80 9F BF", [65533, 65533, 65533, 65533]], + 'overlong U+07FF - 5 bytes' => ["F8 80 80 9F BF", [65533, 65533, 65533, 65533, 65533]], + 'overlong U+07FF - 6 bytes' => ["FC 80 80 80 9F BF", [65533, 65533, 65533, 65533, 65533, 65533]], + 'overlong U+FFFF - 4 bytes' => ["F0 8F BF BF", [65533, 65533, 65533, 65533]], + 'overlong U+FFFF - 5 bytes' => ["F8 80 8F BF BF", [65533, 65533, 65533, 65533, 65533]], + 'overlong U+FFFF - 6 bytes' => ["FC 80 80 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]], + 'overlong U+10FFFF - 5 bytes' => ["F8 84 8F BF BF", [65533, 65533, 65533, 65533, 65533]], + 'overlong U+10FFFF - 6 bytes' => ["FC 80 84 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]], + // UTF-16 surrogates + // surrogates have alternate outputs for when surrogates are being allowed + 'lead surrogate' => ["ED A0 80", [65533, 65533, 65533], [0xD800]], + 'trail surrogate' => ["ED B0 80", [65533, 65533, 65533], [0xDC00]], + 'surrogate pair' => ["ED A0 80 ED B0 80", [65533, 65533, 65533, 65533, 65533, 65533], [0xD800, 0xDC00]], + // self-sync edge cases + 'trailing continuation' => ["0A 80 80", [10, 65533, 65533]], + 'trailing continuation 2' => ["E5 8F A4 80", [21476, 65533]], + ]; + } + /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\UTF8::encode * @covers MensBeam\Intl\Encoding\UTF8::errEnc */ @@ -37,6 +110,15 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\UTF8::encode + * @covers MensBeam\Intl\Encoding\UTF8::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\UTF8::__construct @@ -142,76 +224,4 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest { public function testSeekBackOverRandomData() { return parent::testSeekBackOverRandomData(); } - - public function provideCodePoints() { - return [ - 'U+007A (HTML)' => [false, 0x7A, "7A"], - 'U+007A (fatal)' => [true, 0x7A, "7A"], - 'U+00A2 (HTML)' => [false, 0xA2, "C2 A2"], - 'U+00A2 (fatal)' => [true, 0xA2, "C2 A2"], - 'U+6C34 (HTML)' => [false, 0x6C34, "E6 B0 B4"], - 'U+6C34 (fatal)' => [true, 0x6C34, "E6 B0 B4"], - 'U+1D11E (HTML)' => [false, 0x1D11E, "F0 9D 84 9E"], - 'U+1D11E (fatal)' => [true, 0x1D11E, "F0 9D 84 9E"], - 'U+F8FF (HTML)' => [false, 0xF8FF, "EF A3 BF"], - 'U+F8FF (fatal)' => [true, 0xF8FF, "EF A3 BF"], - 'U+10FFFD (HTML)' => [false, 0x10FFFD, "F4 8F BF BD"], - 'U+10FFFD (fatal)' => [true, 0x10FFFD, "F4 8F BF BD"], - 'U+FFFE (HTML)' => [false, 0xFFFE, "EF BF BE"], - 'U+FFFE (fatal)' => [true, 0xFFFE, "EF BF BE"], - '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)], - ]; - } - - public function provideStrings() { - return [ - // control samples - 'empty string' => ["", []], - 'sanity check' => ["61 62 63 31 32 33", [97, 98, 99, 49, 50, 51]], - 'multibyte control' => ["E5 8F A4 E6 B1 A0 E3 82 84 E8 9B 99 E9 A3 9B E3 81 B3 E8 BE BC E3 82 80 E6 B0 B4 E3 81 AE E9 9F B3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]], - 'mixed sample' => ["7A C2 A2 E6 B0 B4 F0 9D 84 9E EF A3 BF F4 8F BF BD EF BF BE", [122, 162, 27700, 119070, 63743, 1114109, 65534]], - // various invalid sequences - 'invalid code' => ["FF", [65533]], - 'ends early' => ["C0", [65533]], - 'ends early 2' => ["E0", [65533]], - 'invalid trail' => ["C0 00", [65533, 0]], - 'invalid trail 2' => ["C0 C0", [65533, 65533]], - 'invalid trail 3' => ["E0 00", [65533, 0]], - 'invalid trail 4' => ["E0 C0", [65533, 65533]], - 'invalid trail 5' => ["E0 80 00", [65533, 65533, 0]], - 'invalid trail 6' => ["E0 80 C0", [65533, 65533, 65533]], - '> 0x10FFFF' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], - 'obsolete lead byte' => ["FE 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], - 'overlong U+0000 - 2 bytes' => ["C0 80", [65533, 65533]], - 'overlong U+0000 - 3 bytes' => ["E0 80 80", [65533, 65533, 65533]], - 'overlong U+0000 - 4 bytes' => ["F0 80 80 80", [65533, 65533, 65533, 65533]], - 'overlong U+0000 - 5 bytes' => ["F8 80 80 80 80", [65533, 65533, 65533, 65533, 65533]], - 'overlong U+0000 - 6 bytes' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], - 'overlong U+007F - 2 bytes' => ["C1 BF", [65533, 65533]], - 'overlong U+007F - 3 bytes' => ["E0 81 BF", [65533, 65533, 65533]], - 'overlong U+007F - 4 bytes' => ["F0 80 81 BF", [65533, 65533, 65533, 65533]], - 'overlong U+007F - 5 bytes' => ["F8 80 80 81 BF", [65533, 65533, 65533, 65533, 65533]], - 'overlong U+007F - 6 bytes' => ["FC 80 80 80 81 BF", [65533, 65533, 65533, 65533, 65533, 65533]], - 'overlong U+07FF - 3 bytes' => ["E0 9F BF", [65533, 65533, 65533]], - 'overlong U+07FF - 4 bytes' => ["F0 80 9F BF", [65533, 65533, 65533, 65533]], - 'overlong U+07FF - 5 bytes' => ["F8 80 80 9F BF", [65533, 65533, 65533, 65533, 65533]], - 'overlong U+07FF - 6 bytes' => ["FC 80 80 80 9F BF", [65533, 65533, 65533, 65533, 65533, 65533]], - 'overlong U+FFFF - 4 bytes' => ["F0 8F BF BF", [65533, 65533, 65533, 65533]], - 'overlong U+FFFF - 5 bytes' => ["F8 80 8F BF BF", [65533, 65533, 65533, 65533, 65533]], - 'overlong U+FFFF - 6 bytes' => ["FC 80 80 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]], - 'overlong U+10FFFF - 5 bytes' => ["F8 84 8F BF BF", [65533, 65533, 65533, 65533, 65533]], - 'overlong U+10FFFF - 6 bytes' => ["FC 80 84 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]], - // UTF-16 surrogates - // surrogates have alternate outputs for when surrogates are being allowed - 'lead surrogate' => ["ED A0 80", [65533, 65533, 65533], [0xD800]], - 'trail surrogate' => ["ED B0 80", [65533, 65533, 65533], [0xDC00]], - 'surrogate pair' => ["ED A0 80 ED B0 80", [65533, 65533, 65533, 65533, 65533, 65533], [0xD800, 0xDC00]], - // self-sync edge cases - 'trailing continuation' => ["0A 80 80", [10, 65533, 65533]], - 'trailing continuation 2' => ["E5 8F A4 80", [21476, 65533]], - ]; - } } diff --git a/tests/cases/Encoding/TestXUserDefined.php b/tests/cases/Encoding/TestXUserDefined.php index e9fb74c..1dcef72 100644 --- a/tests/cases/Encoding/TestXUserDefined.php +++ b/tests/cases/Encoding/TestXUserDefined.php @@ -60,6 +60,7 @@ class TestXUserDefined extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\Encoder * @covers MensBeam\Intl\Encoding\XUserDefined::encode * @covers MensBeam\Intl\Encoding\XUserDefined::errEnc */ @@ -67,6 +68,15 @@ class TestXUserDefined extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testEncodeCodePoints($fatal, $input, $exp); } + /** + * @dataProvider provideCodePoints + * @covers MensBeam\Intl\Encoding\XUserDefined::encode + * @covers MensBeam\Intl\Encoding\XUserDefined::errEnc + */ + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + return parent::testEncodeCodePointsStatically($fatal, $input, $exp); + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\XUserDefined::__construct diff --git a/tests/lib/CoderDecoderTest.php b/tests/lib/CoderDecoderTest.php index 3bf4e8f..142d22c 100644 --- a/tests/lib/CoderDecoderTest.php +++ b/tests/lib/CoderDecoderTest.php @@ -6,9 +6,30 @@ declare(strict_types=1); namespace MensBeam\Intl\Test; +use \MensBeam\Intl\Encoding\Encoder; + abstract class CoderDecoderTest extends DecoderTest { public function testEncodeCodePoints(bool $fatal, $input, $exp) { $class = $this->testedClass; + $label = $class::NAME; + $e = new Encoder($label, $fatal); + $input = (array) $input; + if ($exp instanceof \Throwable) { + $this->expectException(get_class($exp)); + $this->expectExceptionCode($exp->getCode()); + } else { + $exp = strtolower(str_replace(" ", "", $exp)); + } + $out = $e->encode($input); + $this->assertSame($exp, bin2hex($out)); + } + + public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { + $class = $this->testedClass; + if (!method_exists($class, "encode")) { + $this->assertTrue(true); + return; + } if ($exp instanceof \Throwable) { $this->expectException(get_class($exp)); $this->expectExceptionCode($exp->getCode());