diff --git a/README.md b/README.md
index 52767c2..6cc7199 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,7 @@ Included here is a partial suite of WHATWG-compatible seekable string decoders w
* gb18030
* GBK
* Big5
+* EUC-KR
* all single-byte encodings
* x-user-defined
diff --git a/lib/Encoding/EUCKR.php b/lib/Encoding/EUCKR.php
index 7c0d68f..54ce73f 100644
--- a/lib/Encoding/EUCKR.php
+++ b/lib/Encoding/EUCKR.php
@@ -107,7 +107,7 @@ class EUCKR implements StatelessEncoding {
$distance--;
$this->posChar--;
}
- while ($distance > 0 && ($this->posByte > 0 || $this->bufferedCode > 0)) {
+ while ($distance > 0 && $this->posByte > 0) {
$distance--;
$this->posChar--;
// go back one byte
diff --git a/tests/cases/Encoding/TestBig5.php b/tests/cases/Encoding/TestBig5.php
index defb69e..55f2933 100644
--- a/tests/cases/Encoding/TestBig5.php
+++ b/tests/cases/Encoding/TestBig5.php
@@ -148,9 +148,9 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest {
'two-byte character' => ["D7 D7", [36290]],
'EOF after first byte' => ["D7", [65533]],
'low byte after first byte' => ["D7 39", [65533, 57]],
- '0x80 as first byte' => ["80 D7", [65533, 65533]],
- '0xFF as first byte' => ["FF D7", [65533, 65533]],
- 'invalid high byte as first byte' => ["81 D7", [65533]],
+ '0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
+ '0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
+ 'invalid high byte as first byte' => ["81 D7 00", [65533, 0]],
'0x7F after first byte' => ["D7 7F", [65533, 127]],
'0xFF after first byte' => ["D7 FF", [65533]],
'invalid high byte after first byte' => ["D7 81", [65533]],
diff --git a/tests/cases/Encoding/TestEUCKR.php b/tests/cases/Encoding/TestEUCKR.php
index 4363b47..3d67109 100644
--- a/tests/cases/Encoding/TestEUCKR.php
+++ b/tests/cases/Encoding/TestEUCKR.php
@@ -12,11 +12,21 @@ use MensBeam\Intl\Encoding\EncoderException;
class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = EUCKR::class;
- protected $seekString = "";
- protected $seekCodes = [];
- protected $seekOffsets = [];
+ /*
+ Char 0 U+007A (1 byte) Offset 0
+ Char 1 U+ACF2 (2 bytes) Offset 1
+ Char 2 U+0020 (1 byte) Offset 3
+ Char 3 U+6C34 (2 bytes) Offset 4
+ Char 4 U+0391 (2 bytes) Offset 6
+ Char 5 U+03C9 (2 bytes) Offset 8
+ Char 6 U+002A (1 byte) Offset 10
+ End of string at char 7, offset 11
+ */
+ protected $seekString = "7A 81E9 20 E2A9 A5C1 A5F8 2A";
+ protected $seekCodes = [0x7A, 0xACF2, 0x20, 0x6C34, 0x391, 0x3C9, 0x2A];
+ protected $seekOffsets = [0, 1, 3, 4, 6, 8, 10, 11];
/* This string contains an invalid character sequence sandwiched between two null characters */
- protected $brokenChar = "";
+ protected $brokenChar = "00 FF 00";
/**
* @dataProvider provideCodePoints
@@ -118,11 +128,33 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
public function provideCodePoints() {
return [
+ 'U+0064 (HTML)' => [false, 0x64, "64"],
+ 'U+0064 (fatal)' => [true, 0x64, "64"],
+ 'U+00CA (HTML)' => [false, 0xCA, bin2hex("Ê")],
+ 'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
+ 'U+ACF2 (HTML)' => [false, 0xACF2, "81 E9"],
+ 'U+ACF2 (fatal)' => [true, 0xACF2, "81 E9"],
+ '-1 (HTML)' => [false, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
+ '-1 (fatal)' => [true, -1, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
+ '0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
+ '0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
+ 'empty string' => ["", []],
+ 'sanity check' => ["40", [64]],
+ 'two-byte character' => ["D7 D7", [21033]],
+ 'EOF after first byte' => ["D7", [65533]],
+ 'low byte after first byte' => ["D7 39", [65533, 57]],
+ '0x80 as first byte' => ["80 D7 00", [65533, 65533, 0]],
+ '0xFF as first byte' => ["FF D7 00", [65533, 65533, 0]],
+ '0x7F after first byte' => ["D7 7F", [65533, 127]],
+ '0xFF after first byte' => ["D7 FF", [65533]],
+ 'non-character' => ["A5 DC", [65533]],
+ 'mixed string' => ["7A D7 AA A4 F4 88 62 88 A5", [122, 30267, 12676, 45714, 45802]],
+ 'mixed string 2' => ["62 D7 D7 D7 D7 62", [98, 21033, 21033, 98]],
];
}
diff --git a/tools/test-big5.html b/tools/test-big5.html
index 3aa7ecc..f6ce6c5 100644
--- a/tools/test-big5.html
+++ b/tools/test-big5.html
@@ -9,9 +9,9 @@ var sampleStrings = {
// invalid sequences
'EOF after first byte': "D7",
'low byte after first byte': "D7 39",
- '0x80 as first byte': "80 D7",
- '0xFF as first byte': "FF D7",
- 'invalid high byte as first byte': "81 D7",
+ '0x80 as first byte': "80 D7 00",
+ '0xFF as first byte': "FF D7 00",
+ 'invalid high byte as first byte': "81 D7 00",
'0x7F after first byte': "D7 7F",
'0xFF after first byte': "D7 FF",
'invalid high byte after first byte': "D7 81",
diff --git a/tools/test-euckr.html b/tools/test-euckr.html
new file mode 100644
index 0000000..30ba9be
--- /dev/null
+++ b/tools/test-euckr.html
@@ -0,0 +1,38 @@
+
+
+
+