Browse Source

Tests for general encoder

multi-byte
J. King 4 years ago
parent
commit
10328b6806
  1. 43
      lib/Encoding/Encoder.php
  2. 94
      tests/cases/Encoding/TestBig5.php
  3. 118
      tests/cases/Encoding/TestEUCJP.php
  4. 74
      tests/cases/Encoding/TestEUCKR.php
  5. 233
      tests/cases/Encoding/TestGB18030.php
  6. 10
      tests/cases/Encoding/TestISO2022JP.php
  7. 100
      tests/cases/Encoding/TestShiftJIS.php
  8. 12
      tests/cases/Encoding/TestSingleByte.php
  9. 154
      tests/cases/Encoding/TestUTF8.php
  10. 10
      tests/cases/Encoding/TestXUserDefined.php
  11. 21
      tests/lib/CoderDecoderTest.php

43
lib/Encoding/Encoder.php

@ -22,14 +22,31 @@ class Encoder {
if (!$l || !$l['encoder']) {
throw new EncoderException("Label '$label' does not have an encoder", Encoder::E_UNAVAILABLE_ENCODER);
} else {
$this->name = $s['name'];
$this->name = $l['name'];
$this->fatal = $fatal;
}
}
public function encode(int $codePoint): string {
public function encode(iterable $codePoints): string {
$oldMode = $this->mode;
$this->reset();
$out = "";
try {
foreach ($codePoints as $codePoint) {
$out .= $this->encodeChar($codePoint);
}
if ($this->name === "ISO-2022-JP" && $this->mode !== self::MODE_ASCII) {
$out .= "\x1B\x28\x42";
}
} finally {
$this->mode = $oldMode;
}
return $out;
}
public function encodeChar(int $codePoint): string {
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT);
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", Encoding::E_INVALID_CODE_POINT);
}
switch ($this->name) {
case "UTF-8":
@ -80,23 +97,25 @@ class Encoder {
return Macintosh::encode($codePoint, $this->fatal);
case "Shift_JIS":
return ShiftJIS::encode($codePoint, $this->fatal);
case "windows1250":
case "windows-1250":
return Windows1250::encode($codePoint, $this->fatal);
case "windows1251":
case "windows-1251":
return Windows1251::encode($codePoint, $this->fatal);
case "windows1252":
case "windows-1252":
return Windows1252::encode($codePoint, $this->fatal);
case "windows1253":
case "windows-1253":
return Windows1253::encode($codePoint, $this->fatal);
case "windows1254":
case "windows-1254":
return Windows1254::encode($codePoint, $this->fatal);
case "windows1255":
case "windows-1255":
return Windows1255::encode($codePoint, $this->fatal);
case "windows1256":
case "windows-1256":
return Windows1256::encode($codePoint, $this->fatal);
case "windows1257":
case "windows-1257":
return Windows1257::encode($codePoint, $this->fatal);
case "windows874":
case "windows-1258":
return Windows1258::encode($codePoint, $this->fatal);
case "windows-874":
return Windows874::encode($codePoint, $this->fatal);
case "x-mac-cyrillic":
return XMacCyrillic::encode($codePoint, $this->fatal);

94
tests/cases/Encoding/TestBig5.php

@ -28,8 +28,51 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest {
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00CA (HTML)' => [false, 0xCA, bin2hex("&#202;")],
'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+3007 (HTML)' => [false, 0x3007, "C6 E2"],
'U+3007 (fatal)' => [true, 0x3007, "C6 E2"],
'U+5341 (HTML)' => [false, 0x5341, "A4 51"],
'U+5341 (fatal)' => [true, 0x5341, "A4 51"],
'U+2561 (HTML)' => [false, 0x2561, "F9 EB"],
'U+2561 (fatal)' => [true, 0x2561, "F9 EB"],
'U+256D (HTML)' => [false, 0x256D, "A2 7E"],
'U+256D (fatal)' => [true, 0x256D, "A2 7E"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'two-byte character' => ["D7 D7", [36290]],
'EOF after first byte' => ["D7", [65533]],
'low byte after first byte' => ["D7 39", [65533, 57]],
'0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
'0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
'invalid high byte as first byte' => ["81 D7 00", [65533, 0]],
'0x7F after first byte' => ["D7 7F", [65533, 127]],
'0xFF after first byte' => ["D7 FF", [65533]],
'invalid high byte after first byte' => ["D7 81", [65533]],
'broken string' => ["00 FF 00", [0, 65533, 0]],
'double-characters low' => ["88 62 88 64", [202, 772, 202, 780]],
'double-characters high' => ["88 A3 88 A5", [234, 772, 234, 780]],
'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 34508, 27700, 202, 772, 234, 780]],
'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 36290, 36290, 98]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\Big5::encode
* @covers MensBeam\Intl\Encoding\Big5::errEnc
*/
@ -37,6 +80,15 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Big5::encode
* @covers MensBeam\Intl\Encoding\Big5::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Big5::__construct
@ -143,48 +195,6 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00CA (HTML)' => [false, 0xCA, bin2hex("&#202;")],
'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+3007 (HTML)' => [false, 0x3007, "C6 E2"],
'U+3007 (fatal)' => [true, 0x3007, "C6 E2"],
'U+5341 (HTML)' => [false, 0x5341, "A4 51"],
'U+5341 (fatal)' => [true, 0x5341, "A4 51"],
'U+2561 (HTML)' => [false, 0x2561, "F9 EB"],
'U+2561 (fatal)' => [true, 0x2561, "F9 EB"],
'U+256D (HTML)' => [false, 0x256D, "A2 7E"],
'U+256D (fatal)' => [true, 0x256D, "A2 7E"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'two-byte character' => ["D7 D7", [36290]],
'EOF after first byte' => ["D7", [65533]],
'low byte after first byte' => ["D7 39", [65533, 57]],
'0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
'0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
'invalid high byte as first byte' => ["81 D7 00", [65533, 0]],
'0x7F after first byte' => ["D7 7F", [65533, 127]],
'0xFF after first byte' => ["D7 FF", [65533]],
'invalid high byte after first byte' => ["D7 81", [65533]],
'broken string' => ["00 FF 00", [0, 65533, 0]],
'double-characters low' => ["88 62 88 64", [202, 772, 202, 780]],
'double-characters high' => ["88 A3 88 A5", [234, 772, 234, 780]],
'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 34508, 27700, 202, 772, 234, 780]],
'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 36290, 36290, 98]],
];
}
/**
* @group optional
*/

118
tests/cases/Encoding/TestEUCJP.php

@ -28,8 +28,63 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest {
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00A5 (HTML)' => [false, 0xA5, "5C"],
'U+00A5 (fatal)' => [true, 0xA5, "5C"],
'U+203E (HTML)' => [false, 0x203E, "7E"],
'U+203E (fatal)' => [true, 0x203E, "7E"],
'U+3088 (HTML)' => [false, 0x3088, "A4 E8"],
'U+3088 (fatal)' => [true, 0x3088, "A4 E8"],
'U+FF96 (HTML)' => [false, 0xFF96, "8E D6"],
'U+FF96 (fatal)' => [true, 0xFF96, "8E D6"],
'U+2212 (HTML)' => [false, 0x2212, "A1 DD"],
'U+2212 (fatal)' => [true, 0x2212, "A1 DD"],
'U+00E6 (HTML)' => [false, 0xE6, bin2hex("&#230;")],
'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+FFE2 (HTML)' => [false, 0xFFE2, "A2 CC"],
'U+FFE2 (fatal)' => [true, 0xFFE2, "A2 CC"],
'U+2116 (HTML)' => [false, 0x2116, "AD E2"],
'U+2116 (fatal)' => [true, 0x2116, "AD E2"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'former ASCII deviations' => ["5C 7E", [92, 126]],
'changed multibyte index' => ["A1DD", [65293]],
'JIS X 0201 range' => ["8EA1 8EDF", [65377, 65439]],
'JIS X 0201 bogus range' => ["8EA0 8EE0", [65533, 65533]],
'JIS X 0201 truncated character 1' => ["8E", [65533]],
'JIS X 0201 truncated character 2' => ["8E 20", [65533, 32]],
'JIS X 0201 truncated character 3' => ["8E FF", [65533]],
'JIS X 0212 assigned range' => ["8FA2AF 8FEDE3", [728, 40869]],
'JIS X 0212 total range' => ["8FA1A1 8FFEFE", [65533, 65533]],
'JIS X 0212 bogus range 1' => ["8FA0A1 8FFFFE", [65533, 65533, 65533, 65533]],
'JIS X 0212 bogus range 2' => ["8FA1A0 8FFEFF", [65533, 65533]],
'JIS X 0212 truncated character 1' => ["8FA2", [65533]],
'JIS X 0212 truncated character 2' => ["8FA2 20", [65533, 32]],
'JIS X 0212 truncated character 3' => ["8FA2 FF", [65533]],
'JIS X 0208 assigned range' => ["A1A1 FCFE", [12288, 65282]],
'JIS X 0208 total range' => ["A1A1 FEFE", [12288, 65533]],
'JIS X 0208 bogus range' => ["A1A0 A0FE", [65533, 65533, 65533]],
'JIS X 0208 truncated character 1' => ["A1", [65533]],
'JIS X 0208 truncated character 2' => ["A1 20", [65533, 32]],
'JIS X 0208 truncated character 3' => ["A1 FF", [65533]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\EUCJP::encode
* @covers MensBeam\Intl\Encoding\EUCJP::errEnc
*/
@ -37,6 +92,15 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\EUCJP::encode
* @covers MensBeam\Intl\Encoding\EUCJP::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\EUCJP::__construct
@ -142,60 +206,6 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00A5 (HTML)' => [false, 0xA5, "5C"],
'U+00A5 (fatal)' => [true, 0xA5, "5C"],
'U+203E (HTML)' => [false, 0x203E, "7E"],
'U+203E (fatal)' => [true, 0x203E, "7E"],
'U+3088 (HTML)' => [false, 0x3088, "A4 E8"],
'U+3088 (fatal)' => [true, 0x3088, "A4 E8"],
'U+FF96 (HTML)' => [false, 0xFF96, "8E D6"],
'U+FF96 (fatal)' => [true, 0xFF96, "8E D6"],
'U+2212 (HTML)' => [false, 0x2212, "A1 DD"],
'U+2212 (fatal)' => [true, 0x2212, "A1 DD"],
'U+00E6 (HTML)' => [false, 0xE6, bin2hex("&#230;")],
'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+FFE2 (HTML)' => [false, 0xFFE2, "A2 CC"],
'U+FFE2 (fatal)' => [true, 0xFFE2, "A2 CC"],
'U+2116 (HTML)' => [false, 0x2116, "AD E2"],
'U+2116 (fatal)' => [true, 0x2116, "AD E2"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'former ASCII deviations' => ["5C 7E", [92, 126]],
'changed multibyte index' => ["A1DD", [65293]],
'JIS X 0201 range' => ["8EA1 8EDF", [65377, 65439]],
'JIS X 0201 bogus range' => ["8EA0 8EE0", [65533, 65533]],
'JIS X 0201 truncated character 1' => ["8E", [65533]],
'JIS X 0201 truncated character 2' => ["8E 20", [65533, 32]],
'JIS X 0201 truncated character 3' => ["8E FF", [65533]],
'JIS X 0212 assigned range' => ["8FA2AF 8FEDE3", [728, 40869]],
'JIS X 0212 total range' => ["8FA1A1 8FFEFE", [65533, 65533]],
'JIS X 0212 bogus range 1' => ["8FA0A1 8FFFFE", [65533, 65533, 65533, 65533]],
'JIS X 0212 bogus range 2' => ["8FA1A0 8FFEFF", [65533, 65533]],
'JIS X 0212 truncated character 1' => ["8FA2", [65533]],
'JIS X 0212 truncated character 2' => ["8FA2 20", [65533, 32]],
'JIS X 0212 truncated character 3' => ["8FA2 FF", [65533]],
'JIS X 0208 assigned range' => ["A1A1 FCFE", [12288, 65282]],
'JIS X 0208 total range' => ["A1A1 FEFE", [12288, 65533]],
'JIS X 0208 bogus range' => ["A1A0 A0FE", [65533, 65533, 65533]],
'JIS X 0208 truncated character 1' => ["A1", [65533]],
'JIS X 0208 truncated character 2' => ["A1 20", [65533, 32]],
'JIS X 0208 truncated character 3' => ["A1 FF", [65533]],
];
}
/**
* @group optional
*/

74
tests/cases/Encoding/TestEUCKR.php

@ -28,8 +28,41 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00CA (HTML)' => [false, 0xCA, bin2hex("&#202;")],
'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+ACF2 (HTML)' => [false, 0xACF2, "81 E9"],
'U+ACF2 (fatal)' => [true, 0xACF2, "81 E9"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'two-byte character' => ["D7 D7", [21033]],
'EOF after first byte' => ["D7", [65533]],
'low byte after first byte' => ["D7 39", [65533, 57]],
'0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
'0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
'0x7F after first byte' => ["D7 7F", [65533, 127]],
'0xFF after first byte' => ["D7 FF", [65533]],
'non-character' => ["A5 DC", [65533]],
'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 30267, 12676, 45714, 45802]],
'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 21033, 21033, 98]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\EUCKR::encode
* @covers MensBeam\Intl\Encoding\EUCKR::errEnc
*/
@ -37,6 +70,15 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\EUCKR::encode
* @covers MensBeam\Intl\Encoding\EUCKR::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\EUCKR::__construct
@ -143,38 +185,6 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00CA (HTML)' => [false, 0xCA, bin2hex("&#202;")],
'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+ACF2 (HTML)' => [false, 0xACF2, "81 E9"],
'U+ACF2 (fatal)' => [true, 0xACF2, "81 E9"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'two-byte character' => ["D7 D7", [21033]],
'EOF after first byte' => ["D7", [65533]],
'low byte after first byte' => ["D7 39", [65533, 57]],
'0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
'0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
'0x7F after first byte' => ["D7 7F", [65533, 127]],
'0xFF after first byte' => ["D7 FF", [65533]],
'non-character' => ["A5 DC", [65533]],
'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 30267, 12676, 45714, 45802]],
'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 21033, 21033, 98]],
];
}
/**
* @group optional
*/

233
tests/cases/Encoding/TestGB18030.php

@ -33,8 +33,119 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
$this->testedClass = GB18030::class;
}
public function provideCodePoints() {
// bytes confirmed using Firefox
$series_gb18030 = [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+20AC (HTML)' => [false, 0x20AC, "A2 E3"],
'U+20AC (fatal)' => [true, 0x20AC, "A2 E3"],
'U+2164 (HTML)' => [false, 0x2164, "A2 F5"],
'U+2164 (fatal)' => [true, 0x2164, "A2 F5"],
'U+3A74 (HTML)' => [false, 0x3A74, "82 31 97 30"],
'U+3A74 (fatal)' => [true, 0x3A74, "82 31 97 30"],
'U+E7C7 (HTML)' => [false, 0xE7C7, "81 35 F4 37"],
'U+E7C7 (fatal)' => [true, 0xE7C7, "81 35 F4 37"],
'U+1D11E (HTML)' => [false, 0x1D11E, "94 32 BE 34"],
'U+1D11E (fatal)' => [true, 0x1D11E, "94 32 BE 34"],
'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("&#58853;")],
'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+3000 (HTML)' => [false, 0x3000, "A1 A1"],
'U+3000 (fatal)' => [true, 0x3000, "A1 A1"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
$series_gbk = [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+20AC (HTML)' => [false, 0x20AC, "80"],
'U+20AC (fatal)' => [true, 0x20AC, "80"],
'U+2164 (HTML)' => [false, 0x2164, "A2 F5"],
'U+2164 (fatal)' => [true, 0x2164, "A2 F5"],
'U+3A74 (HTML)' => [false, 0x3A74, bin2hex("&#14964;")],
'U+3A74 (fatal)' => [true, 0x3A74, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+E7C7 (HTML)' => [false, 0xE7C7, bin2hex("&#59335;")],
'U+E7C7 (fatal)' => [true, 0xE7C7, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+1D11E (HTML)' => [false, 0x1D11E, bin2hex("&#119070;")],
'U+1D11E (fatal)' => [true, 0x1D11E, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("&#58853;")],
'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+3000 (HTML)' => [false, 0x3000, "A1 A1"],
'U+3000 (fatal)' => [true, 0x3000, "A1 A1"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
foreach ($series_gb18030 as $name => $test) {
array_push($test, GB18030::class);
yield "gb18030 $name" => $test;
}
foreach ($series_gbk as $name => $test) {
array_push($test, GBK::class);
yield "GBK $name" => $test;
}
}
public function provideStrings() {
return [
'empty string' => ["", []],
// valid single characters
'sanity check' => ["40", [64]],
'special case for 0x80' => ["80", [8364]],
'four-byte special case' => ["81 35 F4 37", [59335]],
'two-byte character' => ["A8 4E", [8735]],
'four-byte character' => ["82 31 A2 37", [15081]],
// cut sequences
'EOF after first byte' => ["82", [65533]],
'EOF after second byte' => ["82 30", [65533]],
'EOF after third byte' => ["82 30 81", [65533]],
// invalid sequences
'bad first byte' => ["FF 35 F4 37", [65533, 53, 65533]],
'bad second byte' => ["81 FF F4 37", [65533, 65533]],
'bad third byte' => ["81 35 FF 37", [65533, 53, 65533, 55]],
'bad fourth byte' => ["81 35 F4 FF", [65533, 53, 65533]],
'control first byte' => ["00 35 F4 37", [0, 53, 65533]],
'control second byte' => ["81 00 F4 37", [65533, 0, 65533]],
'control third byte' => ["81 35 00 37", [65533, 53, 0, 55]],
'control fourth byte' => ["81 35 F4 00", [65533, 53, 65533, 0]],
// invalid sequences with clean EOF
'bad first byte (padded)' => ["FF 35 F4 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]],
'bad second byte (padded)' => ["81 FF F4 37 00 00 00 00", [65533, 65533, 55, 0, 0, 0, 0]],
'bad third byte (padded)' => ["81 35 FF 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]],
'bad fourth byte (padded)' => ["81 35 F4 FF 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0]],
'control first byte (padded)' => ["00 35 F4 37 00 00 00 00", [0, 53, 65533, 55, 0, 0, 0, 0]],
'control second byte (padded)' => ["81 00 F4 37 00 00 00 00", [65533, 0, 65533, 55, 0, 0, 0, 0]],
'control third byte (padded)' => ["81 35 00 37 00 00 00 00", [65533, 53, 0, 55, 0, 0, 0, 0]],
'control fourth byte (padded)' => ["81 35 F4 00 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0, 0]],
// out-of-range sequences
'void sequence' => ["84 32 A4 39", [65533]],
'void sequence 2' => ["FE 39 FE 39", [65533]],
// backward seeking tests
'seek test 1' => ["81 81 81 30", [20118, 65533]],
'seek test 2' => ["81 81 80", [20118, 8364]],
'seek test 3' => ["81 81 00", [20118, 0]],
'seek test 4' => ["81 81 81 00", [20118, 65533, 0]],
'seek test 5' => ["81 30 30 30", [65533, 48, 48, 48]],
'seek test 6' => ["81 30 81 81", [65533, 48, 20118]],
'seek test 7' => ["30 30 81 81", [48, 48, 20118]],
'seek test 8' => ["F8 83 FE 80", [40229, 18211]],
'seek test 1 (padded)' => ["00 00 00 00 81 81 81 30 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 48, 0, 0, 0, 0]],
'seek test 2 (padded)' => ["00 00 00 00 81 81 80 00 00 00 00", [0, 0, 0, 0, 20118, 8364, 0, 0, 0, 0]],
'seek test 3 (padded)' => ["00 00 00 00 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 0, 0, 0, 0, 0]],
'seek test 4 (padded)' => ["00 00 00 00 81 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 0, 0, 0, 0, 0]],
'seek test 5 (padded)' => ["00 00 00 00 81 30 30 30 00 00 00 00", [0, 0, 0, 0, 65533, 48, 48, 48, 0, 0, 0, 0]],
'seek test 6 (padded)' => ["00 00 00 00 81 30 81 81 00 00 00 00", [0, 0, 0, 0, 65533, 48, 20118, 0, 0, 0, 0]],
'seek test 7 (padded)' => ["00 00 00 00 30 30 81 81 00 00 00 00", [0, 0, 0, 0, 48, 48, 20118, 0, 0, 0, 0]],
'seek test 8 (padded)' => ["00 00 00 00 F8 83 FE 80 00 00 00 00", [0, 0, 0, 0, 40229, 18211, 0, 0, 0, 0]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\GB18030::encode
* @covers MensBeam\Intl\Encoding\GB18030::errEnc
* @covers MensBeam\Intl\Encoding\GBK::encode
@ -45,6 +156,18 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\GB18030::encode
* @covers MensBeam\Intl\Encoding\GB18030::errEnc
* @covers MensBeam\Intl\Encoding\GBK::encode
* @covers MensBeam\Intl\Encoding\GBK::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp, $class = self::class) {
$this->testedClass = $class;
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\GB18030::__construct
@ -151,116 +274,6 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
public function provideCodePoints() {
// bytes confirmed using Firefox
$series_gb18030 = [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+20AC (HTML)' => [false, 0x20AC, "A2 E3"],
'U+20AC (fatal)' => [true, 0x20AC, "A2 E3"],
'U+2164 (HTML)' => [false, 0x2164, "A2 F5"],
'U+2164 (fatal)' => [true, 0x2164, "A2 F5"],
'U+3A74 (HTML)' => [false, 0x3A74, "82 31 97 30"],
'U+3A74 (fatal)' => [true, 0x3A74, "82 31 97 30"],
'U+E7C7 (HTML)' => [false, 0xE7C7, "81 35 F4 37"],
'U+E7C7 (fatal)' => [true, 0xE7C7, "81 35 F4 37"],
'U+1D11E (HTML)' => [false, 0x1D11E, "94 32 BE 34"],
'U+1D11E (fatal)' => [true, 0x1D11E, "94 32 BE 34"],
'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("&#58853;")],
'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+3000 (HTML)' => [false, 0x3000, "A1 A1"],
'U+3000 (fatal)' => [true, 0x3000, "A1 A1"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
$series_gbk = [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+20AC (HTML)' => [false, 0x20AC, "80"],
'U+20AC (fatal)' => [true, 0x20AC, "80"],
'U+2164 (HTML)' => [false, 0x2164, "A2 F5"],
'U+2164 (fatal)' => [true, 0x2164, "A2 F5"],
'U+3A74 (HTML)' => [false, 0x3A74, bin2hex("&#14964;")],
'U+3A74 (fatal)' => [true, 0x3A74, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+E7C7 (HTML)' => [false, 0xE7C7, bin2hex("&#59335;")],
'U+E7C7 (fatal)' => [true, 0xE7C7, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+1D11E (HTML)' => [false, 0x1D11E, bin2hex("&#119070;")],
'U+1D11E (fatal)' => [true, 0x1D11E, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+E5E5 (HTML)' => [false, 0xE5E5, bin2hex("&#58853;")],
'U+E5E5 (fatal)' => [true, 0xE5E5, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+3000 (HTML)' => [false, 0x3000, "A1 A1"],
'U+3000 (fatal)' => [true, 0x3000, "A1 A1"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
foreach ($series_gb18030 as $name => $test) {
array_push($test, GB18030::class);
yield "gb18030 $name" => $test;
}
foreach ($series_gbk as $name => $test) {
array_push($test, GBK::class);
yield "GBK $name" => $test;
}
}
public function provideStrings() {
return [
'empty string' => ["", []],
// valid single characters
'sanity check' => ["40", [64]],
'special case for 0x80' => ["80", [8364]],
'four-byte special case' => ["81 35 F4 37", [59335]],
'two-byte character' => ["A8 4E", [8735]],
'four-byte character' => ["82 31 A2 37", [15081]],
// cut sequences
'EOF after first byte' => ["82", [65533]],
'EOF after second byte' => ["82 30", [65533]],
'EOF after third byte' => ["82 30 81", [65533]],
// invalid sequences
'bad first byte' => ["FF 35 F4 37", [65533, 53, 65533]],
'bad second byte' => ["81 FF F4 37", [65533, 65533]],
'bad third byte' => ["81 35 FF 37", [65533, 53, 65533, 55]],
'bad fourth byte' => ["81 35 F4 FF", [65533, 53, 65533]],
'control first byte' => ["00 35 F4 37", [0, 53, 65533]],
'control second byte' => ["81 00 F4 37", [65533, 0, 65533]],
'control third byte' => ["81 35 00 37", [65533, 53, 0, 55]],
'control fourth byte' => ["81 35 F4 00", [65533, 53, 65533, 0]],
// invalid sequences with clean EOF
'bad first byte (padded)' => ["FF 35 F4 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]],
'bad second byte (padded)' => ["81 FF F4 37 00 00 00 00", [65533, 65533, 55, 0, 0, 0, 0]],
'bad third byte (padded)' => ["81 35 FF 37 00 00 00 00", [65533, 53, 65533, 55, 0, 0, 0, 0]],
'bad fourth byte (padded)' => ["81 35 F4 FF 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0]],
'control first byte (padded)' => ["00 35 F4 37 00 00 00 00", [0, 53, 65533, 55, 0, 0, 0, 0]],
'control second byte (padded)' => ["81 00 F4 37 00 00 00 00", [65533, 0, 65533, 55, 0, 0, 0, 0]],
'control third byte (padded)' => ["81 35 00 37 00 00 00 00", [65533, 53, 0, 55, 0, 0, 0, 0]],
'control fourth byte (padded)' => ["81 35 F4 00 00 00 00 00", [65533, 53, 65533, 0, 0, 0, 0, 0]],
// out-of-range sequences
'void sequence' => ["84 32 A4 39", [65533]],
'void sequence 2' => ["FE 39 FE 39", [65533]],
// backward seeking tests
'seek test 1' => ["81 81 81 30", [20118, 65533]],
'seek test 2' => ["81 81 80", [20118, 8364]],
'seek test 3' => ["81 81 00", [20118, 0]],
'seek test 4' => ["81 81 81 00", [20118, 65533, 0]],
'seek test 5' => ["81 30 30 30", [65533, 48, 48, 48]],
'seek test 6' => ["81 30 81 81", [65533, 48, 20118]],
'seek test 7' => ["30 30 81 81", [48, 48, 20118]],
'seek test 8' => ["F8 83 FE 80", [40229, 18211]],
'seek test 1 (padded)' => ["00 00 00 00 81 81 81 30 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 48, 0, 0, 0, 0]],
'seek test 2 (padded)' => ["00 00 00 00 81 81 80 00 00 00 00", [0, 0, 0, 0, 20118, 8364, 0, 0, 0, 0]],
'seek test 3 (padded)' => ["00 00 00 00 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 0, 0, 0, 0, 0]],
'seek test 4 (padded)' => ["00 00 00 00 81 81 81 00 00 00 00 00", [0, 0, 0, 0, 20118, 65533, 0, 0, 0, 0, 0]],
'seek test 5 (padded)' => ["00 00 00 00 81 30 30 30 00 00 00 00", [0, 0, 0, 0, 65533, 48, 48, 48, 0, 0, 0, 0]],
'seek test 6 (padded)' => ["00 00 00 00 81 30 81 81 00 00 00 00", [0, 0, 0, 0, 65533, 48, 20118, 0, 0, 0, 0]],
'seek test 7 (padded)' => ["00 00 00 00 30 30 81 81 00 00 00 00", [0, 0, 0, 0, 48, 48, 20118, 0, 0, 0, 0]],
'seek test 8 (padded)' => ["00 00 00 00 F8 83 FE 80 00 00 00 00", [0, 0, 0, 0, 40229, 18211, 0, 0, 0, 0]],
];
}
/**
* @group optional
*/

10
tests/cases/Encoding/TestISO2022JP.php

@ -60,6 +60,7 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\ISO2022JP::encode
* @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc
*/
@ -67,6 +68,15 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\ISO2022JP::encode
* @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\ISO2022JP::__construct

100
tests/cases/Encoding/TestShiftJIS.php

@ -28,8 +28,54 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00A5 (HTML)' => [false, 0xA5, "5C"],
'U+00A5 (fatal)' => [true, 0xA5, "5C"],
'U+203E (HTML)' => [false, 0x203E, "7E"],
'U+203E (fatal)' => [true, 0x203E, "7E"],
'U+3088 (HTML)' => [false, 0x3088, "82 E6"],
'U+3088 (fatal)' => [true, 0x3088, "82 E6"],
'U+FF96 (HTML)' => [false, 0xFF96, "D6"],
'U+FF96 (fatal)' => [true, 0xFF96, "D6"],
'U+2212 (HTML)' => [false, 0x2212, "81 7C"],
'U+2212 (fatal)' => [true, 0x2212, "81 7C"],
'U+00E6 (HTML)' => [false, 0xE6, bin2hex("&#230;")],
'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+FFE2 (HTML)' => [false, 0xFFE2, "81 CA"],
'U+FFE2 (fatal)' => [true, 0xFFE2, "81 CA"],
'U+2116 (HTML)' => [false, 0x2116, "87 82"],
'U+2116 (fatal)' => [true, 0x2116, "87 82"],
'U+E000 (HTML)' => [false, 0xE000, bin2hex("&#57344;")],
'U+E000 (fatal)' => [true, 0xE000, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'invalid byte' => ["FF", [65533]],
'former ASCII deviations' => ["5C 7E", [92, 126]],
'JIS X 0201 range' => ["A1 DF", [65377, 65439]],
'EUDC range' => ["F040 F9FC", [57344, 59223]],
'JIS X 0208 assigned range' => ["8140 FC4B", [12288, 40657]],
'JIS X 0208 total range' => ["8140 FCFC", [12288, 65533]],
'JIS X 0208 truncated character 1' => ["81", [65533]],
'JIS X 0208 truncated character 2' => ["81 20", [65533, 32]],
'JIS X 0208 truncated character 3' => ["81 FF", [65533]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\ShiftJIS::encode
* @covers MensBeam\Intl\Encoding\ShiftJIS::errEnc
*/
@ -37,6 +83,15 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\ShiftJIS::encode
* @covers MensBeam\Intl\Encoding\ShiftJIS::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\ShiftJIS::__construct
@ -143,51 +198,6 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00A5 (HTML)' => [false, 0xA5, "5C"],
'U+00A5 (fatal)' => [true, 0xA5, "5C"],
'U+203E (HTML)' => [false, 0x203E, "7E"],
'U+203E (fatal)' => [true, 0x203E, "7E"],
'U+3088 (HTML)' => [false, 0x3088, "82 E6"],
'U+3088 (fatal)' => [true, 0x3088, "82 E6"],
'U+FF96 (HTML)' => [false, 0xFF96, "D6"],
'U+FF96 (fatal)' => [true, 0xFF96, "D6"],
'U+2212 (HTML)' => [false, 0x2212, "81 7C"],
'U+2212 (fatal)' => [true, 0x2212, "81 7C"],
'U+00E6 (HTML)' => [false, 0xE6, bin2hex("&#230;")],
'U+00E6 (fatal)' => [true, 0xE6, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+FFE2 (HTML)' => [false, 0xFFE2, "81 CA"],
'U+FFE2 (fatal)' => [true, 0xFFE2, "81 CA"],
'U+2116 (HTML)' => [false, 0x2116, "87 82"],
'U+2116 (fatal)' => [true, 0x2116, "87 82"],
'U+E000 (HTML)' => [false, 0xE000, bin2hex("&#57344;")],
'U+E000 (fatal)' => [true, 0xE000, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'invalid byte' => ["FF", [65533]],
'former ASCII deviations' => ["5C 7E", [92, 126]],
'JIS X 0201 range' => ["A1 DF", [65377, 65439]],
'EUDC range' => ["F040 F9FC", [57344, 59223]],
'JIS X 0208 assigned range' => ["8140 FC4B", [12288, 40657]],
'JIS X 0208 total range' => ["8140 FCFC", [12288, 65533]],
'JIS X 0208 truncated character 1' => ["81", [65533]],
'JIS X 0208 truncated character 2' => ["81 20", [65533, 32]],
'JIS X 0208 truncated character 3' => ["81 FF", [65533]],
];
}
/**
* @group optional
*/

12
tests/cases/Encoding/TestSingleByte.php

@ -8,6 +8,7 @@ namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\SingleByteEncoding;
use MensBeam\Intl\Encoding\EncoderException;
use MensBeam\Intl\Encoding\Encoder;
class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest {
// maps taken from https://github.com/web-platform-tests/wpt/blob/d6c29bef8d4bcdfe4f689defca73360b07647d71/encoding/single-byte-decoder.html
@ -83,9 +84,20 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest {
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode
*/
public function testEncodeCodePoints(bool $fatal, $input, $exp, string $class = SingleByteEncoding::class) {
$e = new Encoder($class::NAME, $fatal);
$out = $e->encode($input);
$this->assertSame(bin2hex($exp), bin2hex($out));
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp, string $class = SingleByteEncoding::class) {
$out = "";
foreach ($input as $code) {
$out .= $class::encode($code, $fatal);

154
tests/cases/Encoding/TestUTF8.php

@ -28,8 +28,81 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest {
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
public function provideCodePoints() {
return [
'U+007A (HTML)' => [false, 0x7A, "7A"],
'U+007A (fatal)' => [true, 0x7A, "7A"],
'U+00A2 (HTML)' => [false, 0xA2, "C2 A2"],
'U+00A2 (fatal)' => [true, 0xA2, "C2 A2"],
'U+6C34 (HTML)' => [false, 0x6C34, "E6 B0 B4"],
'U+6C34 (fatal)' => [true, 0x6C34, "E6 B0 B4"],
'U+1D11E (HTML)' => [false, 0x1D11E, "F0 9D 84 9E"],
'U+1D11E (fatal)' => [true, 0x1D11E, "F0 9D 84 9E"],
'U+F8FF (HTML)' => [false, 0xF8FF, "EF A3 BF"],
'U+F8FF (fatal)' => [true, 0xF8FF, "EF A3 BF"],
'U+10FFFD (HTML)' => [false, 0x10FFFD, "F4 8F BF BD"],
'U+10FFFD (fatal)' => [true, 0x10FFFD, "F4 8F BF BD"],
'U+FFFE (HTML)' => [false, 0xFFFE, "EF BF BE"],
'U+FFFE (fatal)' => [true, 0xFFFE, "EF BF BE"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'sanity check' => ["61 62 63 31 32 33", [97, 98, 99, 49, 50, 51]],
'multibyte control' => ["E5 8F A4 E6 B1 A0 E3 82 84 E8 9B 99 E9 A3 9B E3 81 B3 E8 BE BC E3 82 80 E6 B0 B4 E3 81 AE E9 9F B3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]],
'mixed sample' => ["7A C2 A2 E6 B0 B4 F0 9D 84 9E EF A3 BF F4 8F BF BD EF BF BE", [122, 162, 27700, 119070, 63743, 1114109, 65534]],
// various invalid sequences
'invalid code' => ["FF", [65533]],
'ends early' => ["C0", [65533]],
'ends early 2' => ["E0", [65533]],
'invalid trail' => ["C0 00", [65533, 0]],
'invalid trail 2' => ["C0 C0", [65533, 65533]],
'invalid trail 3' => ["E0 00", [65533, 0]],
'invalid trail 4' => ["E0 C0", [65533, 65533]],
'invalid trail 5' => ["E0 80 00", [65533, 65533, 0]],
'invalid trail 6' => ["E0 80 C0", [65533, 65533, 65533]],
'> 0x10FFFF' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'obsolete lead byte' => ["FE 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 2 bytes' => ["C0 80", [65533, 65533]],
'overlong U+0000 - 3 bytes' => ["E0 80 80", [65533, 65533, 65533]],
'overlong U+0000 - 4 bytes' => ["F0 80 80 80", [65533, 65533, 65533, 65533]],
'overlong U+0000 - 5 bytes' => ["F8 80 80 80 80", [65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 6 bytes' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 2 bytes' => ["C1 BF", [65533, 65533]],
'overlong U+007F - 3 bytes' => ["E0 81 BF", [65533, 65533, 65533]],
'overlong U+007F - 4 bytes' => ["F0 80 81 BF", [65533, 65533, 65533, 65533]],
'overlong U+007F - 5 bytes' => ["F8 80 80 81 BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 6 bytes' => ["FC 80 80 80 81 BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 3 bytes' => ["E0 9F BF", [65533, 65533, 65533]],
'overlong U+07FF - 4 bytes' => ["F0 80 9F BF", [65533, 65533, 65533, 65533]],
'overlong U+07FF - 5 bytes' => ["F8 80 80 9F BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 6 bytes' => ["FC 80 80 80 9F BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 4 bytes' => ["F0 8F BF BF", [65533, 65533, 65533, 65533]],
'overlong U+FFFF - 5 bytes' => ["F8 80 8F BF BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 6 bytes' => ["FC 80 80 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 5 bytes' => ["F8 84 8F BF BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 6 bytes' => ["FC 80 84 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]],
// UTF-16 surrogates
// surrogates have alternate outputs for when surrogates are being allowed
'lead surrogate' => ["ED A0 80", [65533, 65533, 65533], [0xD800]],
'trail surrogate' => ["ED B0 80", [65533, 65533, 65533], [0xDC00]],
'surrogate pair' => ["ED A0 80 ED B0 80", [65533, 65533, 65533, 65533, 65533, 65533], [0xD800, 0xDC00]],
// self-sync edge cases
'trailing continuation' => ["0A 80 80", [10, 65533, 65533]],
'trailing continuation 2' => ["E5 8F A4 80", [21476, 65533]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\UTF8::encode
* @covers MensBeam\Intl\Encoding\UTF8::errEnc
*/
@ -37,6 +110,15 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\UTF8::encode
* @covers MensBeam\Intl\Encoding\UTF8::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::__construct
@ -142,76 +224,4 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest {
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
public function provideCodePoints() {
return [
'U+007A (HTML)' => [false, 0x7A, "7A"],
'U+007A (fatal)' => [true, 0x7A, "7A"],
'U+00A2 (HTML)' => [false, 0xA2, "C2 A2"],
'U+00A2 (fatal)' => [true, 0xA2, "C2 A2"],
'U+6C34 (HTML)' => [false, 0x6C34, "E6 B0 B4"],
'U+6C34 (fatal)' => [true, 0x6C34, "E6 B0 B4"],
'U+1D11E (HTML)' => [false, 0x1D11E, "F0 9D 84 9E"],
'U+1D11E (fatal)' => [true, 0x1D11E, "F0 9D 84 9E"],
'U+F8FF (HTML)' => [false, 0xF8FF, "EF A3 BF"],
'U+F8FF (fatal)' => [true, 0xF8FF, "EF A3 BF"],
'U+10FFFD (HTML)' => [false, 0x10FFFD, "F4 8F BF BD"],
'U+10FFFD (fatal)' => [true, 0x10FFFD, "F4 8F BF BD"],
'U+FFFE (HTML)' => [false, 0xFFFE, "EF BF BE"],
'U+FFFE (fatal)' => [true, 0xFFFE, "EF BF BE"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'sanity check' => ["61 62 63 31 32 33", [97, 98, 99, 49, 50, 51]],
'multibyte control' => ["E5 8F A4 E6 B1 A0 E3 82 84 E8 9B 99 E9 A3 9B E3 81 B3 E8 BE BC E3 82 80 E6 B0 B4 E3 81 AE E9 9F B3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]],
'mixed sample' => ["7A C2 A2 E6 B0 B4 F0 9D 84 9E EF A3 BF F4 8F BF BD EF BF BE", [122, 162, 27700, 119070, 63743, 1114109, 65534]],
// various invalid sequences
'invalid code' => ["FF", [65533]],
'ends early' => ["C0", [65533]],
'ends early 2' => ["E0", [65533]],
'invalid trail' => ["C0 00", [65533, 0]],
'invalid trail 2' => ["C0 C0", [65533, 65533]],
'invalid trail 3' => ["E0 00", [65533, 0]],
'invalid trail 4' => ["E0 C0", [65533, 65533]],
'invalid trail 5' => ["E0 80 00", [65533, 65533, 0]],
'invalid trail 6' => ["E0 80 C0", [65533, 65533, 65533]],
'> 0x10FFFF' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'obsolete lead byte' => ["FE 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 2 bytes' => ["C0 80", [65533, 65533]],
'overlong U+0000 - 3 bytes' => ["E0 80 80", [65533, 65533, 65533]],
'overlong U+0000 - 4 bytes' => ["F0 80 80 80", [65533, 65533, 65533, 65533]],
'overlong U+0000 - 5 bytes' => ["F8 80 80 80 80", [65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 6 bytes' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 2 bytes' => ["C1 BF", [65533, 65533]],
'overlong U+007F - 3 bytes' => ["E0 81 BF", [65533, 65533, 65533]],
'overlong U+007F - 4 bytes' => ["F0 80 81 BF", [65533, 65533, 65533, 65533]],
'overlong U+007F - 5 bytes' => ["F8 80 80 81 BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 6 bytes' => ["FC 80 80 80 81 BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 3 bytes' => ["E0 9F BF", [65533, 65533, 65533]],
'overlong U+07FF - 4 bytes' => ["F0 80 9F BF", [65533, 65533, 65533, 65533]],
'overlong U+07FF - 5 bytes' => ["F8 80 80 9F BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 6 bytes' => ["FC 80 80 80 9F BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 4 bytes' => ["F0 8F BF BF", [65533, 65533, 65533, 65533]],
'overlong U+FFFF - 5 bytes' => ["F8 80 8F BF BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 6 bytes' => ["FC 80 80 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 5 bytes' => ["F8 84 8F BF BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 6 bytes' => ["FC 80 84 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]],
// UTF-16 surrogates
// surrogates have alternate outputs for when surrogates are being allowed
'lead surrogate' => ["ED A0 80", [65533, 65533, 65533], [0xD800]],
'trail surrogate' => ["ED B0 80", [65533, 65533, 65533], [0xDC00]],
'surrogate pair' => ["ED A0 80 ED B0 80", [65533, 65533, 65533, 65533, 65533, 65533], [0xD800, 0xDC00]],
// self-sync edge cases
'trailing continuation' => ["0A 80 80", [10, 65533, 65533]],
'trailing continuation 2' => ["E5 8F A4 80", [21476, 65533]],
];
}
}

10
tests/cases/Encoding/TestXUserDefined.php

@ -60,6 +60,7 @@ class TestXUserDefined extends \MensBeam\Intl\Test\CoderDecoderTest {
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\XUserDefined::encode
* @covers MensBeam\Intl\Encoding\XUserDefined::errEnc
*/
@ -67,6 +68,15 @@ class TestXUserDefined extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\XUserDefined::encode
* @covers MensBeam\Intl\Encoding\XUserDefined::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::__construct

21
tests/lib/CoderDecoderTest.php

@ -6,9 +6,30 @@
declare(strict_types=1);
namespace MensBeam\Intl\Test;
use \MensBeam\Intl\Encoding\Encoder;
abstract class CoderDecoderTest extends DecoderTest {
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
$class = $this->testedClass;
$label = $class::NAME;
$e = new Encoder($label, $fatal);
$input = (array) $input;
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
} else {
$exp = strtolower(str_replace(" ", "", $exp));
}
$out = $e->encode($input);
$this->assertSame($exp, bin2hex($out));
}
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
$class = $this->testedClass;
if (!method_exists($class, "encode")) {
$this->assertTrue(true);
return;
}
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());

Loading…
Cancel
Save