diff --git a/lib/Encoding/ShiftJIS.php b/lib/Encoding/ShiftJIS.php
index c4eb274..998add0 100644
--- a/lib/Encoding/ShiftJIS.php
+++ b/lib/Encoding/ShiftJIS.php
@@ -120,19 +120,19 @@ class ShiftJIS extends AbstractEncoding implements StatelessEncoding {
}
// go back one byte
$b1 = ord(@$this->string[--$this->posByte]);
- if ($b1 < 0x40 || $b1 > 0xFC || $b1 == 0x7F || $this->posByte === 0 || $this->posByte === $this->errMark) { // these bytes never appear in sequences, and the first byte is necessarily the start of a sequence
+ if ($b1 < 0x40 || $b1 > 0xFC || $b1 === 0x7F || $this->posByte === 0 || $this->posByte === $this->errMark) { // these bytes never appear in sequences, and the first byte is necessarily the start of a sequence
// the byte is a character
continue;
}
// go back a second byte
$b2 = ord(@$this->string[--$this->posByte]);
- if ($this->posByte === $this->errMark || $this->posByte === 0) { // position is unambiguously the start of a character
- // the two bytes form a character
- continue;
- } elseif ($b2 < 0x81 || $b2 > 0xFC || ($b2 >= 0xA0 && $b2 <= 0xDF)) { // these bytes never appear in the lead of a sequence
+ if ($b2 < 0x81 || $b2 > 0xFC || ($b2 >= 0xA0 && $b2 <= 0xDF)) { // these bytes never appear in the lead of a sequence
// the first byte was a character
$this->posByte += 1;
continue;
+ } elseif ($this->posByte === $this->errMark || $this->posByte === 0) { // position is unambiguously the start of a character
+ // the two bytes form a character
+ continue;
} else { // the second byte is part of a two-byte sequence, but it's unclear if it's the lead or trail byte
$start = $this->posByte + 2;
$pos = $this->posByte;
diff --git a/tests/cases/Encoding/TestGB18030.php b/tests/cases/Encoding/TestGB18030.php
index ddf30a6..6cddf7c 100644
--- a/tests/cases/Encoding/TestGB18030.php
+++ b/tests/cases/Encoding/TestGB18030.php
@@ -144,6 +144,14 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
}
+
+ /**
+ * @covers MensBeam\Intl\Encoding\GB18030::seekBack
+ */
+ public function testSeekBackOverRandomData() {
+ return parent::testSeekBackOverRandomData();
+ }
+
public function provideCodePoints() {
// bytes confirmed using Firefox
$series_gb18030 = [
@@ -200,14 +208,6 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
}
}
-
- /**
- * @covers MensBeam\Intl\Encoding\GB18030::seekBack
- */
- public function testSeekBackOverRandomData() {
- return parent::testSeekBackOverRandomData();
- }
-
public function provideStrings() {
return [
'empty string' => ["", []],
diff --git a/tests/cases/Encoding/TestShiftJIS.php b/tests/cases/Encoding/TestShiftJIS.php
index abb154c..1007e8d 100644
--- a/tests/cases/Encoding/TestShiftJIS.php
+++ b/tests/cases/Encoding/TestShiftJIS.php
@@ -12,16 +12,6 @@ use MensBeam\Intl\Encoding\EncoderException;
class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = ShiftJIS::class;
- /*
- Char 0 U+007A (1 byte) Offset 0
- Char 1 U+86CC (2 bytes) Offset 1
- Char 2 U+6C34 (2 bytes) Offset 3
- Char 3 U+00CA (0 bytes) Offset 5
- Char 4 U+0304 (2 bytes) Offset 5
- Char 5 U+00EA (0 bytes) Offset 7
- Char 6 U+030C (2 bytes) Offset 7
- End of string at char 7, offset 9
- */
protected $seekString = "";
protected $seekCodes = [];
protected $seekOffsets = [];
@@ -151,6 +141,12 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
public function provideStrings() {
return [
+ 'empty string' => ["", []],
+ 'sanity check' => ["40", [64]],
+ 'former ASCII deviations' => ["5C 7E", [92, 126]],
+ 'JIS X 0201 range' => ["A1 DF", [65377, 65439]],
+ 'EUDC range' => ["F040 F9FC", [57344, 59223]],
+ 'JIS X 0208 assigned range' => ["8140 9F7E 8180 9FFC", [12288, 27631, 247, 28364]],
];
}
diff --git a/tests/lib/DecoderTest.php b/tests/lib/DecoderTest.php
index 5f74450..fcf069e 100644
--- a/tests/lib/DecoderTest.php
+++ b/tests/lib/DecoderTest.php
@@ -54,7 +54,7 @@ abstract class DecoderTest extends \PHPUnit\Framework\TestCase {
}
$this->assertSame(sizeof($exp), $pos);
while ($s->posChar()) {
- $this->assertSame(0, $s->seek(-1));
+ $this->assertSame(0, $s->seek(-1), "Error stepping back to position ".($pos - 1));
$this->assertSame(--$pos, $s->posChar());
$act[] = $s->nextCode();
$s->seek(-1);
diff --git a/tools/test-shiftjis.html b/tools/test-shiftjis.html
new file mode 100644
index 0000000..69b31d5
--- /dev/null
+++ b/tools/test-shiftjis.html
@@ -0,0 +1,20 @@
+
+
+
+
+