Browse Source

ISO 2022-JP encoder tests and fixes

multi-byte
J. King 4 years ago
parent
commit
d580e93e52
  1. 11
      lib/Encoding.php
  2. 8
      lib/Encoding/Encoder.php
  3. 6
      lib/Encoding/ISO2022JP.php
  4. 53
      tests/cases/Encoding/TestISO2022JP.php
  5. 25
      tools/test-iso2022jp.html
  6. 49
      tools/test.js

11
lib/Encoding.php

File diff suppressed because one or more lines are too long

8
lib/Encoding/Encoder.php

@ -135,7 +135,7 @@ class Encoder {
}
return chr($codePoint);
} elseif ($codePoint === 0xA5 || $codePoint === 0x203E) {
$ord = $codePoint = 0xA5 ? 0x5C : 0x7E;
$ord = $codePoint === 0xA5 ? 0x5C : 0x7E;
if ($this->mode !== self::MODE_ROMAN) {
return $this->modeSet(self::MODE_ROMAN, chr($ord));
}
@ -148,15 +148,15 @@ class Encoder {
}
$pointer = ISO2022JP::TABLE_POINTERS[$codePoint] ?? array_flip(ISO2022JP::TABLE_JIS0208)[$codePoint] ?? null;
if (!is_null($pointer)) {
$lead = chr($pointer / 94 - 0x21);
$trail = chr($pointer % 94 - 0x21);
$lead = chr((int) ($pointer / 94) + 0x21);
$trail = chr(($pointer % 94) + 0x21);
if ($this->mode !== self::MODE_JIS) {
return $this->modeSet(self::MODE_JIS, $lead.$trail);
}
return $lead.$trail;
}
return $this->err($codePoint);
}
}
}
}

6
lib/Encoding/ISO2022JP.php

@ -6,7 +6,7 @@
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO2022JP extends AbstractEncoding implements StatefulEncoding {
class ISO2022JP extends AbstractEncoding implements Encoding {
const NAME = "ISO-2022-JP";
const LABELS = [
"csiso2022jp",
@ -143,10 +143,6 @@ class ISO2022JP extends AbstractEncoding implements StatefulEncoding {
return $mode;
}
public static function encode(array $codePoints, bool $fatal = true): string {
return "";
}
protected function seekBack(int $distance): int {
if ($this->dirtyEOF && $this->posByte === $this->lenByte) {
list($this->modeMark, $this->mode) = array_pop($this->modeStack);

53
tests/cases/Encoding/TestISO2022JP.php

@ -7,6 +7,8 @@ declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\ISO2022JP;
use MensBeam\Intl\Encoding\Encoding;
use MensBeam\Intl\Encoding\EncoderException;
class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = ISO2022JP::class;
@ -32,6 +34,52 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
public function provideCodePoints() {
return [
'U+0020 (HTML)' => [false, [0x20], "20"],
'U+0020 (fatal)' => [true, [0x20], "20"],
'U+005C (HTML)' => [false, [0x5C], "5C"],
'U+005C (fatal)' => [true, [0x5C], "5C"],
'U+007E (HTML)' => [false, [0x7E], "7E"],
'U+007E (fatal)' => [true, [0x7E], "7E"],
'U+00A5 (HTML)' => [false, [0xA5], "1B 28 4A 5C 1B 28 42"],
'U+00A5 (fatal)' => [true, [0xA5], "1B 28 4A 5C 1B 28 42"],
'U+203E (HTML)' => [false, [0x203E], "1B 28 4A 7E 1B 28 42"],
'U+203E (fatal)' => [true, [0x203E], "1B 28 4A 7E 1B 28 42"],
'U+FF61 (HTML)' => [false, [0xFF61], "1B 24 42 21 23 1B 28 42"],
'U+FF61 (fatal)' => [true, [0xFF61], "1B 24 42 21 23 1B 28 42"],
'U+FF9F (HTML)' => [false, [0xFF9F], "1B 24 42 21 2C 1B 28 42"],
'U+FF9F (fatal)' => [true, [0xFF9F], "1B 24 42 21 2C 1B 28 42"],
'U+2212 (HTML)' => [false, [0x2212], "1B 24 42 21 5D 1B 28 42"],
'U+2212 (fatal)' => [true, [0x2212], "1B 24 42 21 5D 1B 28 42"],
'U+2116 (HTML)' => [false, [0x2116], "1B 24 42 2D 62 1B 28 42"],
'U+2116 (fatal)' => [true, [0x2116], "1B 24 42 2D 62 1B 28 42"],
'U+FFE2 (HTML)' => [false, [0xFFE2], "1B 24 42 22 4C 1B 28 42"],
'U+FFE2 (fatal)' => [true, [0xFFE2], "1B 24 42 22 4C 1B 28 42"],
'U+00C6 (HTML)' => [false, [0xC6], "26 23 31 39 38 3B"],
'U+00C6 (fatal)' => [true, [0xC6], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'U+FFFD (HTML)' => [false, [0xFFFD], "26 23 36 35 35 33 33 3B"],
'U+FFFD (fatal)' => [true, [0xFFFD], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'Roman (HTML)' => [false, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"],
'Roman (fatal)' => [true, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"],
'Roman to ASCII (HTML)' => [false, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"],
'Roman to ASCII (fatal)' => [true, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"],
'Roman to error (HTML)' => [false, [0xA5, 0x80], "1B 28 4A 5C 26 23 31 32 38 3B 1B 28 42"],
'Roman to error (fatal)' => [true, [0xA5, 0x80], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'JIS (HTML)' => [false, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"],
'JIS (fatal)' => [true, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"],
'JIS to Roman (HTML)' => [false, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"],
'JIS to Roman (fatal)' => [true, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"],
'JIS to ASCII 1 (HTML)' => [false, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"],
'JIS to ASCII 1 (fatal)' => [true, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"],
'JIS to ASCII 2 (HTML)' => [false, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"],
'JIS to ASCII 2 (fatal)' => [true, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"],
'JIS to error (HTML)' => [false, [0x2116, 0x80], "1B 24 42 2D 62 1B 28 42 26 23 31 32 38 3B"],
'JIS to error (fatal)' => [true, [0x2116, 0x80], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'Escape characters (HTML)' => [false, [0x1B, 0xE, 0xF], "26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B"],
'Escape characters (fatal)' => [true, [0x1B, 0xE, 0xF], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)],
'-1 (HTML)' => [false, [-1], new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, [-1], new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, [0x110000], new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, [0x110000], new EncoderException("", Encoding::E_INVALID_CODE_POINT)],
];
}
@ -61,8 +109,6 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\ISO2022JP::encode
* @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc
*/
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
return parent::testEncodeCodePoints($fatal, $input, $exp);
@ -70,8 +116,7 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\ISO2022JP::encode
* @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc
* @coversNothing
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);

25
tools/test-iso2022jp.html

@ -17,8 +17,29 @@ var sampleStrings = {
'Invalid bytes': "80 FF 1B2849 00 20 7F 1B2442 00 2100 FF FF",
};
var sampleCharacters = {
'U+0020': [0x20],
'U+005C': [0x5C],
'U+007E': [0x7E],
'U+00A5': [0xA5],
'U+203E': [0x203E],
'U+FF61': [0xFF61],
'U+FF9F': [0xFF9F],
'U+2212': [0x2212],
'U+2116': [0x2116],
'U+FFE2': [0xFFE2],
'U+00C6': [0xC6],
'U+FFFD': [0xFFFD],
'Roman': [0xA5, 0x20, 0x203E],
'Roman to ASCII': [0xA5, 0x5C],
'Roman to error': [0xA5, 0x80],
'JIS': [0x2116, 0xFFE2, 0x2212],
'JIS to Roman': [0x2116, 0xA5],
'JIS to ASCII 1': [0x2116, 0x20],
'JIS to ASCII 2': [0x2116, 0x5C],
'JIS to error': [0x2116, 0x80],
'Escape characters': [0x1B, 0xE, 0xF], // Even Firefox is wrong here; see https://github.com/web-platform-tests/wpt/pull/26158
'-1': [-1],
'0x110000': [0x110000],
};
var seekCodePoints = [
];
</script>
<script src="test.js"></script>

49
tools/test.js

@ -10,9 +10,9 @@ function encodeCodePoint(code, fatal) {
return 'new EncoderException("", Encoding::E_INVALID_CODE_POINT)';
} else {
var l = document.createElement("a");
l.href = "http://example.com/?" + String.fromCodePoint(code) + "a";
l.href = "http://example.com/?" + String.fromCodePoint(code) + "#";
var bytes = [];
let url = l.search.substr(1, l.search.length - 2);
let url = l.search.substr(1);
for (let a = 0; a < url.length; a++) {
if ((url.charAt(a) == "%" && url.substr(a, 6) == "%26%23") || url.charAt(a) == "&") {
// character cannot be encoded
@ -32,10 +32,33 @@ function encodeCodePoint(code, fatal) {
return bytes;
}
function encodeCodePoints(codes, fatal) {
for (let a = 0; a < codes.length; a++) {
if (codes[a] < 0 || codes[a] > 0x10FFFF) {
return 'new EncoderException("", Encoding::E_INVALID_CODE_POINT)';
}
}
var l = document.createElement("a");
l.href = "http://example.com/?" + String.fromCodePoint(...codes) + "#";
var bytes = [];
let url = decodeURIComponent(l.search.substr(1));
if (fatal && url.indexOf("&#") > -1) {
return 'new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)';
}
for (let a = 0; a < url.length; a++) {
bytes.push(url.charCodeAt(a).toString(16).padStart(2, "0").toUpperCase());
}
return bytes;
}
function wrapCodePoint(code, fatal) {
var out = encodeCodePoint(code, fatal);
if (typeof code === "number") {
var out = encodeCodePoint(code, fatal);
} else {
var out = encodeCodePoints(code, fatal);
}
if (Array.isArray(out)) {
return '"' + out.join(" ") + '"';
return ('"' + out.join(" ") + '"').toUpperCase();
} else if (out.charAt(0) == "&") {
return 'bin2hex("' + out + '")';
} else {
@ -72,9 +95,21 @@ if(typeof sampleStrings != 'undefined') {
if(typeof sampleCharacters != 'undefined') {
for (name in sampleCharacters) {
let code = sampleCharacters[name];
if (code > -1 && code % 1 == 0) code = "0x" + code.toString(16).toUpperCase();
let line1 = "'" + name + " (HTML)' => [false, " + code + ", " + wrapCodePoint(code, false) + "],\n";
let line2 = "'" + name + " (fatal)' => [true, " + code + ", " + wrapCodePoint(code, true) + "],\n";
if (typeof code == "number" && code > -1 && code % 1 == 0) {
var displayCode = "0x" + code.toString(16).toUpperCase();
} else if (typeof code !== "number") {
var displayCode = [...code];
for (let a = 0; a < displayCode.length; a++) {
if (displayCode[a] > -1 && displayCode[a] % 1 == 0) {
displayCode[a] = "0x" + displayCode[a].toString(16).toUpperCase();
}
}
displayCode = "[" + displayCode.join(", ") + "]";
} else {
var displayCode = code;
}
let line1 = "'" + name + " (HTML)' => [false, " + displayCode + ", " + wrapCodePoint(code, false) + "],\n";
let line2 = "'" + name + " (fatal)' => [true, " + displayCode + ", " + wrapCodePoint(code, true) + "],\n";
out.appendChild(document.createTextNode(line1));
out.appendChild(document.createTextNode(line2));
}

Loading…
Cancel
Save