Browse Source

Full tests for EUC-KR

span
J. King 6 years ago
parent
commit
2810ed9b2a
  1. 1
      README.md
  2. 2
      lib/Encoding/EUCKR.php
  3. 6
      tests/cases/Encoding/TestBig5.php
  4. 40
      tests/cases/Encoding/TestEUCKR.php
  5. 6
      tools/test-big5.html
  6. 38
      tools/test-euckr.html

1
README.md

@ -9,6 +9,7 @@ Included here is a partial suite of WHATWG-compatible seekable string decoders w
* gb18030
* GBK
* Big5
* EUC-KR
* all single-byte encodings
* x-user-defined

2
lib/Encoding/EUCKR.php

@ -107,7 +107,7 @@ class EUCKR implements StatelessEncoding {
$distance--;
$this->posChar--;
}
while ($distance > 0 && ($this->posByte > 0 || $this->bufferedCode > 0)) {
while ($distance > 0 && $this->posByte > 0) {
$distance--;
$this->posChar--;
// go back one byte

6
tests/cases/Encoding/TestBig5.php

@ -148,9 +148,9 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest {
'two-byte character' => ["D7 D7", [36290]],
'EOF after first byte' => ["D7", [65533]],
'low byte after first byte' => ["D7 39", [65533, 57]],
'0x80 as first byte' => ["80 D7", [65533, 65533]],
'0xFF as first byte' => ["FF D7", [65533, 65533]],
'invalid high byte as first byte' => ["81 D7", [65533]],
'0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
'0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
'invalid high byte as first byte' => ["81 D7 00", [65533, 0]],
'0x7F after first byte' => ["D7 7F", [65533, 127]],
'0xFF after first byte' => ["D7 FF", [65533]],
'invalid high byte after first byte' => ["D7 81", [65533]],

40
tests/cases/Encoding/TestEUCKR.php

@ -12,11 +12,21 @@ use MensBeam\Intl\Encoding\EncoderException;
class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = EUCKR::class;
protected $seekString = "";
protected $seekCodes = [];
protected $seekOffsets = [];
/*
Char 0 U+007A (1 byte) Offset 0
Char 1 U+ACF2 (2 bytes) Offset 1
Char 2 U+0020 (1 byte) Offset 3
Char 3 U+6C34 (2 bytes) Offset 4
Char 4 U+0391 (2 bytes) Offset 6
Char 5 U+03C9 (2 bytes) Offset 8
Char 6 U+002A (1 byte) Offset 10
End of string at char 7, offset 11
*/
protected $seekString = "7A 81E9 20 E2A9 A5C1 A5F8 2A";
protected $seekCodes = [0x7A, 0xACF2, 0x20, 0x6C34, 0x391, 0x3C9, 0x2A];
protected $seekOffsets = [0, 1, 3, 4, 6, 8, 10, 11];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "";
protected $brokenChar = "00 FF 00";
/**
* @dataProvider provideCodePoints
@ -118,11 +128,33 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+00CA (HTML)' => [false, 0xCA, bin2hex("Ê")],
'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+ACF2 (HTML)' => [false, 0xACF2, "81 E9"],
'U+ACF2 (fatal)' => [true, 0xACF2, "81 E9"],
'-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'sanity check' => ["40", [64]],
'two-byte character' => ["D7 D7", [21033]],
'EOF after first byte' => ["D7", [65533]],
'low byte after first byte' => ["D7 39", [65533, 57]],
'0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
'0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
'0x7F after first byte' => ["D7 7F", [65533, 127]],
'0xFF after first byte' => ["D7 FF", [65533]],
'non-character' => ["A5 DC", [65533]],
'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 30267, 12676, 45714, 45802]],
'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 21033, 21033, 98]],
];
}

6
tools/test-big5.html

@ -9,9 +9,9 @@ var sampleStrings = {
// invalid sequences
'EOF after first byte': "D7",
'low byte after first byte': "D7 39",
'0x80 as first byte': "80 D7",
'0xFF as first byte': "FF D7",
'invalid high byte as first byte': "81 D7",
'0x80 as first byte': "80 D7 00",
'0xFF as first byte': "FF D7 00",
'invalid high byte as first byte': "81 D7 00",
'0x7F after first byte': "D7 7F",
'0xFF after first byte': "D7 FF",
'invalid high byte after first byte': "D7 81",

38
tools/test-euckr.html

@ -0,0 +1,38 @@
<!DOCTYPE html>
<meta charset=euc-kr>
<script>
var sampleStrings = {
'empty string': "",
// valid single characters
'sanity check': "40",
'two-byte character': "D7 D7",
// invalid sequences
'EOF after first byte': "D7",
'low byte after first byte': "D7 39",
'0x80 as first byte': "80 D7 00",
'0xFF as first byte': "FF D7 00",
'0x7F after first byte': "D7 7F",
'0xFF after first byte': "D7 FF",
'non-character': "A5 DC",
// mixed string
'mixed string': "7A D7 AA A4 F4 88 62 88 A5",
'mixed string 2': "62 D7 D7 D7 D7 62",
};
var sampleCharacters = {
'U+0064': 0x64,
'U+00CA': 0x00CA,
'U+ACF2': 0xACF2,
'-1': -1,
'0x110000': 0x110000,
};
var seekCodePoints = [
0x007A,
0xACF2,
0x0020,
0x6C34,
0x0391,
0x03C9,
0x002A,
];
</script>
<script src="test.js"></script>
Loading…
Cancel
Save