From d580e93e52a44a36a7c25b01894a83521bd7caa3 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 16 Oct 2020 20:13:53 -0400 Subject: [PATCH] ISO 2022-JP encoder tests and fixes --- lib/Encoding.php | 11 ++++++ lib/Encoding/Encoder.php | 8 ++-- lib/Encoding/ISO2022JP.php | 6 +-- tests/cases/Encoding/TestISO2022JP.php | 53 ++++++++++++++++++++++++-- tools/test-iso2022jp.html | 25 +++++++++++- tools/test.js | 49 ++++++++++++++++++++---- 6 files changed, 130 insertions(+), 22 deletions(-) diff --git a/lib/Encoding.php b/lib/Encoding.php index 1a8c8c7..f5610d4 100644 --- a/lib/Encoding.php +++ b/lib/Encoding.php @@ -6,6 +6,9 @@ declare(strict_types=1); namespace MensBeam\Intl; +use MensBeam\Intl\Encoding\Encoder; +use MensBeam\Intl\Encoding\EncoderException; + abstract class Encoding { const LABEL_MAP = ['big5'=>"Big5",'big5-hkscs'=>"Big5",'cn-big5'=>"Big5",'csbig5'=>"Big5",'x-x-big5'=>"Big5",'cseucpkdfmtjapanese'=>"EUC-JP",'euc-jp'=>"EUC-JP",'x-euc-jp'=>"EUC-JP",'cseuckr'=>"EUC-KR",'csksc56011987'=>"EUC-KR",'euc-kr'=>"EUC-KR",'iso-ir-149'=>"EUC-KR",'korean'=>"EUC-KR",'ks_c_5601-1987'=>"EUC-KR",'ks_c_5601-1989'=>"EUC-KR",'ksc5601'=>"EUC-KR",'ksc_5601'=>"EUC-KR",'windows-949'=>"EUC-KR",'gb18030'=>"gb18030",'chinese'=>"GBK",'csgb2312'=>"GBK",'csiso58gb231280'=>"GBK",'gb2312'=>"GBK",'gb_2312'=>"GBK",'gb_2312-80'=>"GBK",'gbk'=>"GBK",'iso-ir-58'=>"GBK",'x-gbk'=>"GBK",'866'=>"IBM866",'cp866'=>"IBM866",'csibm866'=>"IBM866",'ibm866'=>"IBM866",'csiso2022jp'=>"ISO-2022-JP",'iso-2022-jp'=>"ISO-2022-JP",'csisolatin6'=>"ISO-8859-10",'iso-8859-10'=>"ISO-8859-10",'iso-ir-157'=>"ISO-8859-10",'iso8859-10'=>"ISO-8859-10",'iso885910'=>"ISO-8859-10",'l6'=>"ISO-8859-10",'latin6'=>"ISO-8859-10",'iso-8859-13'=>"ISO-8859-13",'iso8859-13'=>"ISO-8859-13",'iso885913'=>"ISO-8859-13",'iso-8859-14'=>"ISO-8859-14",'iso8859-14'=>"ISO-8859-14",'iso885914'=>"ISO-8859-14",'csisolatin9'=>"ISO-8859-15",'iso-8859-15'=>"ISO-8859-15",'iso8859-15'=>"ISO-8859-15",'iso885915'=>"ISO-8859-15",'iso_8859-15'=>"ISO-8859-15",'l9'=>"ISO-8859-15",'iso-8859-16'=>"ISO-8859-16",'csisolatin2'=>"ISO-8859-2",'iso-8859-2'=>"ISO-8859-2",'iso-ir-101'=>"ISO-8859-2",'iso8859-2'=>"ISO-8859-2",'iso88592'=>"ISO-8859-2",'iso_8859-2'=>"ISO-8859-2",'iso_8859-2:1987'=>"ISO-8859-2",'l2'=>"ISO-8859-2",'latin2'=>"ISO-8859-2",'csisolatin3'=>"ISO-8859-3",'iso-8859-3'=>"ISO-8859-3",'iso-ir-109'=>"ISO-8859-3",'iso8859-3'=>"ISO-8859-3",'iso88593'=>"ISO-8859-3",'iso_8859-3'=>"ISO-8859-3",'iso_8859-3:1988'=>"ISO-8859-3",'l3'=>"ISO-8859-3",'latin3'=>"ISO-8859-3",'csisolatin4'=>"ISO-8859-4",'iso-8859-4'=>"ISO-8859-4",'iso-ir-110'=>"ISO-8859-4",'iso8859-4'=>"ISO-8859-4",'iso88594'=>"ISO-8859-4",'iso_8859-4'=>"ISO-8859-4",'iso_8859-4:1988'=>"ISO-8859-4",'l4'=>"ISO-8859-4",'latin4'=>"ISO-8859-4",'csisolatincyrillic'=>"ISO-8859-5",'cyrillic'=>"ISO-8859-5",'iso-8859-5'=>"ISO-8859-5",'iso-ir-144'=>"ISO-8859-5",'iso8859-5'=>"ISO-8859-5",'iso88595'=>"ISO-8859-5",'iso_8859-5'=>"ISO-8859-5",'iso_8859-5:1988'=>"ISO-8859-5",'arabic'=>"ISO-8859-6",'asmo-708'=>"ISO-8859-6",'csiso88596e'=>"ISO-8859-6",'csiso88596i'=>"ISO-8859-6",'csisolatinarabic'=>"ISO-8859-6",'ecma-114'=>"ISO-8859-6",'iso-8859-6'=>"ISO-8859-6",'iso-8859-6-e'=>"ISO-8859-6",'iso-8859-6-i'=>"ISO-8859-6",'iso-ir-127'=>"ISO-8859-6",'iso8859-6'=>"ISO-8859-6",'iso88596'=>"ISO-8859-6",'iso_8859-6'=>"ISO-8859-6",'iso_8859-6:1987'=>"ISO-8859-6",'csisolatingreek'=>"ISO-8859-7",'ecma-118'=>"ISO-8859-7",'elot_928'=>"ISO-8859-7",'greek'=>"ISO-8859-7",'greek8'=>"ISO-8859-7",'iso-8859-7'=>"ISO-8859-7",'iso-ir-126'=>"ISO-8859-7",'iso8859-7'=>"ISO-8859-7",'iso88597'=>"ISO-8859-7",'iso_8859-7'=>"ISO-8859-7",'iso_8859-7:1987'=>"ISO-8859-7",'sun_eu_greek'=>"ISO-8859-7",'csiso88598e'=>"ISO-8859-8",'csisolatinhebrew'=>"ISO-8859-8",'hebrew'=>"ISO-8859-8",'iso-8859-8'=>"ISO-8859-8",'iso-8859-8-e'=>"ISO-8859-8",'iso-ir-138'=>"ISO-8859-8",'iso8859-8'=>"ISO-8859-8",'iso88598'=>"ISO-8859-8",'iso_8859-8'=>"ISO-8859-8",'iso_8859-8:1988'=>"ISO-8859-8",'visual'=>"ISO-8859-8",'csiso88598i'=>"ISO-8859-8-I",'iso-8859-8-i'=>"ISO-8859-8-I",'logical'=>"ISO-8859-8-I",'cskoi8r'=>"KOI8-R",'koi'=>"KOI8-R",'koi8'=>"KOI8-R",'koi8-r'=>"KOI8-R",'koi8_r'=>"KOI8-R",'koi8-ru'=>"KOI8-U",'koi8-u'=>"KOI8-U",'csmacintosh'=>"macintosh",'mac'=>"macintosh",'macintosh'=>"macintosh",'x-mac-roman'=>"macintosh",'csiso2022kr'=>"replacement",'hz-gb-2312'=>"replacement",'iso-2022-cn'=>"replacement",'iso-2022-cn-ext'=>"replacement",'iso-2022-kr'=>"replacement",'replacement'=>"replacement",'csshiftjis'=>"Shift_JIS",'ms932'=>"Shift_JIS",'ms_kanji'=>"Shift_JIS",'shift-jis'=>"Shift_JIS",'shift_jis'=>"Shift_JIS",'sjis'=>"Shift_JIS",'windows-31j'=>"Shift_JIS",'x-sjis'=>"Shift_JIS",'unicodefffe'=>"UTF-16BE",'utf-16be'=>"UTF-16BE",'csunicode'=>"UTF-16LE",'iso-10646-ucs-2'=>"UTF-16LE",'ucs-2'=>"UTF-16LE",'unicode'=>"UTF-16LE",'unicodefeff'=>"UTF-16LE",'utf-16'=>"UTF-16LE",'utf-16le'=>"UTF-16LE",'unicode-1-1-utf-8'=>"UTF-8",'unicode11utf8'=>"UTF-8",'unicode20utf8'=>"UTF-8",'utf-8'=>"UTF-8",'utf8'=>"UTF-8",'x-unicode20utf8'=>"UTF-8",'cp1250'=>"windows-1250",'windows-1250'=>"windows-1250",'x-cp1250'=>"windows-1250",'cp1251'=>"windows-1251",'windows-1251'=>"windows-1251",'x-cp1251'=>"windows-1251",'ansi_x3.4-1968'=>"windows-1252",'ascii'=>"windows-1252",'cp1252'=>"windows-1252",'cp819'=>"windows-1252",'csisolatin1'=>"windows-1252",'ibm819'=>"windows-1252",'iso-8859-1'=>"windows-1252",'iso-ir-100'=>"windows-1252",'iso8859-1'=>"windows-1252",'iso88591'=>"windows-1252",'iso_8859-1'=>"windows-1252",'iso_8859-1:1987'=>"windows-1252",'l1'=>"windows-1252",'latin1'=>"windows-1252",'us-ascii'=>"windows-1252",'windows-1252'=>"windows-1252",'x-cp1252'=>"windows-1252",'cp1253'=>"windows-1253",'windows-1253'=>"windows-1253",'x-cp1253'=>"windows-1253",'cp1254'=>"windows-1254",'csisolatin5'=>"windows-1254",'iso-8859-9'=>"windows-1254",'iso-ir-148'=>"windows-1254",'iso8859-9'=>"windows-1254",'iso88599'=>"windows-1254",'iso_8859-9'=>"windows-1254",'iso_8859-9:1989'=>"windows-1254",'l5'=>"windows-1254",'latin5'=>"windows-1254",'windows-1254'=>"windows-1254",'x-cp1254'=>"windows-1254",'cp1255'=>"windows-1255",'windows-1255'=>"windows-1255",'x-cp1255'=>"windows-1255",'cp1256'=>"windows-1256",'windows-1256'=>"windows-1256",'x-cp1256'=>"windows-1256",'cp1257'=>"windows-1257",'windows-1257'=>"windows-1257",'x-cp1257'=>"windows-1257",'cp1258'=>"windows-1258",'windows-1258'=>"windows-1258",'x-cp1258'=>"windows-1258",'dos-874'=>"windows-874",'iso-8859-11'=>"windows-874",'iso8859-11'=>"windows-874",'iso885911'=>"windows-874",'tis-620'=>"windows-874",'windows-874'=>"windows-874",'x-mac-cyrillic'=>"x-mac-cyrillic",'x-mac-ukrainian'=>"x-mac-cyrillic",'x-user-defined'=>"x-user-defined"]; const NAME_MAP = ['Big5'=>\MensBeam\Intl\Encoding\Big5::class,'EUC-JP'=>\MensBeam\Intl\Encoding\EUCJP::class,'EUC-KR'=>\MensBeam\Intl\Encoding\EUCKR::class,'gb18030'=>\MensBeam\Intl\Encoding\GB18030::class,'GBK'=>\MensBeam\Intl\Encoding\GBK::class,'IBM866'=>\MensBeam\Intl\Encoding\IBM866::class,'ISO-2022-JP'=>\MensBeam\Intl\Encoding\ISO2022JP::class,'ISO-8859-10'=>\MensBeam\Intl\Encoding\ISO885910::class,'ISO-8859-13'=>\MensBeam\Intl\Encoding\ISO885913::class,'ISO-8859-14'=>\MensBeam\Intl\Encoding\ISO885914::class,'ISO-8859-15'=>\MensBeam\Intl\Encoding\ISO885915::class,'ISO-8859-16'=>\MensBeam\Intl\Encoding\ISO885916::class,'ISO-8859-2'=>\MensBeam\Intl\Encoding\ISO88592::class,'ISO-8859-3'=>\MensBeam\Intl\Encoding\ISO88593::class,'ISO-8859-4'=>\MensBeam\Intl\Encoding\ISO88594::class,'ISO-8859-5'=>\MensBeam\Intl\Encoding\ISO88595::class,'ISO-8859-6'=>\MensBeam\Intl\Encoding\ISO88596::class,'ISO-8859-7'=>\MensBeam\Intl\Encoding\ISO88597::class,'ISO-8859-8'=>\MensBeam\Intl\Encoding\ISO88598::class,'ISO-8859-8-I'=>\MensBeam\Intl\Encoding\ISO88598I::class,'KOI8-R'=>\MensBeam\Intl\Encoding\KOI8R::class,'KOI8-U'=>\MensBeam\Intl\Encoding\KOI8U::class,'macintosh'=>\MensBeam\Intl\Encoding\Macintosh::class,'replacement'=>\MensBeam\Intl\Encoding\Replacement::class,'Shift_JIS'=>\MensBeam\Intl\Encoding\ShiftJIS::class,'UTF-16BE'=>\MensBeam\Intl\Encoding\UTF16BE::class,'UTF-16LE'=>\MensBeam\Intl\Encoding\UTF16LE::class,'UTF-8'=>\MensBeam\Intl\Encoding\UTF8::class,'windows-1250'=>\MensBeam\Intl\Encoding\Windows1250::class,'windows-1251'=>\MensBeam\Intl\Encoding\Windows1251::class,'windows-1252'=>\MensBeam\Intl\Encoding\Windows1252::class,'windows-1253'=>\MensBeam\Intl\Encoding\Windows1253::class,'windows-1254'=>\MensBeam\Intl\Encoding\Windows1254::class,'windows-1255'=>\MensBeam\Intl\Encoding\Windows1255::class,'windows-1256'=>\MensBeam\Intl\Encoding\Windows1256::class,'windows-1257'=>\MensBeam\Intl\Encoding\Windows1257::class,'windows-1258'=>\MensBeam\Intl\Encoding\Windows1258::class,'windows-874'=>\MensBeam\Intl\Encoding\Windows874::class,'x-mac-cyrillic'=>\MensBeam\Intl\Encoding\XMacCyrillic::class,'x-user-defined'=>\MensBeam\Intl\Encoding\XUserDefined::class]; @@ -20,6 +23,14 @@ abstract class Encoding { } } + public static function createEncoder(string $encodingLabel, bool $fatal = false): ?Encoder { + try { + return new Encoder($encodingLabel, $fatal); + } catch (EncoderException $e) { + return null; + } + } + public static function matchLabel(string $label): ?array { $label = strtolower(trim($label)); $name = self::LABEL_MAP[$label] ?? null; diff --git a/lib/Encoding/Encoder.php b/lib/Encoding/Encoder.php index b515aa7..75b3d4f 100644 --- a/lib/Encoding/Encoder.php +++ b/lib/Encoding/Encoder.php @@ -135,7 +135,7 @@ class Encoder { } return chr($codePoint); } elseif ($codePoint === 0xA5 || $codePoint === 0x203E) { - $ord = $codePoint = 0xA5 ? 0x5C : 0x7E; + $ord = $codePoint === 0xA5 ? 0x5C : 0x7E; if ($this->mode !== self::MODE_ROMAN) { return $this->modeSet(self::MODE_ROMAN, chr($ord)); } @@ -148,15 +148,15 @@ class Encoder { } $pointer = ISO2022JP::TABLE_POINTERS[$codePoint] ?? array_flip(ISO2022JP::TABLE_JIS0208)[$codePoint] ?? null; if (!is_null($pointer)) { - $lead = chr($pointer / 94 - 0x21); - $trail = chr($pointer % 94 - 0x21); + $lead = chr((int) ($pointer / 94) + 0x21); + $trail = chr(($pointer % 94) + 0x21); if ($this->mode !== self::MODE_JIS) { return $this->modeSet(self::MODE_JIS, $lead.$trail); } return $lead.$trail; } return $this->err($codePoint); - } + } } } diff --git a/lib/Encoding/ISO2022JP.php b/lib/Encoding/ISO2022JP.php index 3ccd5f6..dba68c7 100644 --- a/lib/Encoding/ISO2022JP.php +++ b/lib/Encoding/ISO2022JP.php @@ -6,7 +6,7 @@ declare(strict_types=1); namespace MensBeam\Intl\Encoding; -class ISO2022JP extends AbstractEncoding implements StatefulEncoding { +class ISO2022JP extends AbstractEncoding implements Encoding { const NAME = "ISO-2022-JP"; const LABELS = [ "csiso2022jp", @@ -143,10 +143,6 @@ class ISO2022JP extends AbstractEncoding implements StatefulEncoding { return $mode; } - public static function encode(array $codePoints, bool $fatal = true): string { - return ""; - } - protected function seekBack(int $distance): int { if ($this->dirtyEOF && $this->posByte === $this->lenByte) { list($this->modeMark, $this->mode) = array_pop($this->modeStack); diff --git a/tests/cases/Encoding/TestISO2022JP.php b/tests/cases/Encoding/TestISO2022JP.php index d8b22e7..cb8b7fd 100644 --- a/tests/cases/Encoding/TestISO2022JP.php +++ b/tests/cases/Encoding/TestISO2022JP.php @@ -7,6 +7,8 @@ declare(strict_types=1); namespace MensBeam\Intl\TestCase\Encoding; use MensBeam\Intl\Encoding\ISO2022JP; +use MensBeam\Intl\Encoding\Encoding; +use MensBeam\Intl\Encoding\EncoderException; class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { protected $testedClass = ISO2022JP::class; @@ -32,6 +34,52 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { public function provideCodePoints() { return [ +'U+0020 (HTML)' => [false, [0x20], "20"], +'U+0020 (fatal)' => [true, [0x20], "20"], +'U+005C (HTML)' => [false, [0x5C], "5C"], +'U+005C (fatal)' => [true, [0x5C], "5C"], +'U+007E (HTML)' => [false, [0x7E], "7E"], +'U+007E (fatal)' => [true, [0x7E], "7E"], +'U+00A5 (HTML)' => [false, [0xA5], "1B 28 4A 5C 1B 28 42"], +'U+00A5 (fatal)' => [true, [0xA5], "1B 28 4A 5C 1B 28 42"], +'U+203E (HTML)' => [false, [0x203E], "1B 28 4A 7E 1B 28 42"], +'U+203E (fatal)' => [true, [0x203E], "1B 28 4A 7E 1B 28 42"], +'U+FF61 (HTML)' => [false, [0xFF61], "1B 24 42 21 23 1B 28 42"], +'U+FF61 (fatal)' => [true, [0xFF61], "1B 24 42 21 23 1B 28 42"], +'U+FF9F (HTML)' => [false, [0xFF9F], "1B 24 42 21 2C 1B 28 42"], +'U+FF9F (fatal)' => [true, [0xFF9F], "1B 24 42 21 2C 1B 28 42"], +'U+2212 (HTML)' => [false, [0x2212], "1B 24 42 21 5D 1B 28 42"], +'U+2212 (fatal)' => [true, [0x2212], "1B 24 42 21 5D 1B 28 42"], +'U+2116 (HTML)' => [false, [0x2116], "1B 24 42 2D 62 1B 28 42"], +'U+2116 (fatal)' => [true, [0x2116], "1B 24 42 2D 62 1B 28 42"], +'U+FFE2 (HTML)' => [false, [0xFFE2], "1B 24 42 22 4C 1B 28 42"], +'U+FFE2 (fatal)' => [true, [0xFFE2], "1B 24 42 22 4C 1B 28 42"], +'U+00C6 (HTML)' => [false, [0xC6], "26 23 31 39 38 3B"], +'U+00C6 (fatal)' => [true, [0xC6], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], +'U+FFFD (HTML)' => [false, [0xFFFD], "26 23 36 35 35 33 33 3B"], +'U+FFFD (fatal)' => [true, [0xFFFD], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], +'Roman (HTML)' => [false, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"], +'Roman (fatal)' => [true, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"], +'Roman to ASCII (HTML)' => [false, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"], +'Roman to ASCII (fatal)' => [true, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"], +'Roman to error (HTML)' => [false, [0xA5, 0x80], "1B 28 4A 5C 26 23 31 32 38 3B 1B 28 42"], +'Roman to error (fatal)' => [true, [0xA5, 0x80], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], +'JIS (HTML)' => [false, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"], +'JIS (fatal)' => [true, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"], +'JIS to Roman (HTML)' => [false, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"], +'JIS to Roman (fatal)' => [true, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"], +'JIS to ASCII 1 (HTML)' => [false, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"], +'JIS to ASCII 1 (fatal)' => [true, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"], +'JIS to ASCII 2 (HTML)' => [false, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"], +'JIS to ASCII 2 (fatal)' => [true, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"], +'JIS to error (HTML)' => [false, [0x2116, 0x80], "1B 24 42 2D 62 1B 28 42 26 23 31 32 38 3B"], +'JIS to error (fatal)' => [true, [0x2116, 0x80], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], +'Escape characters (HTML)' => [false, [0x1B, 0xE, 0xF], "26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B"], +'Escape characters (fatal)' => [true, [0x1B, 0xE, 0xF], new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)], +'-1 (HTML)' => [false, [-1], new EncoderException("", Encoding::E_INVALID_CODE_POINT)], +'-1 (fatal)' => [true, [-1], new EncoderException("", Encoding::E_INVALID_CODE_POINT)], +'0x110000 (HTML)' => [false, [0x110000], new EncoderException("", Encoding::E_INVALID_CODE_POINT)], +'0x110000 (fatal)' => [true, [0x110000], new EncoderException("", Encoding::E_INVALID_CODE_POINT)], ]; } @@ -61,8 +109,6 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints * @covers MensBeam\Intl\Encoding\Encoder - * @covers MensBeam\Intl\Encoding\ISO2022JP::encode - * @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc */ public function testEncodeCodePoints(bool $fatal, $input, $exp) { return parent::testEncodeCodePoints($fatal, $input, $exp); @@ -70,8 +116,7 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { /** * @dataProvider provideCodePoints - * @covers MensBeam\Intl\Encoding\ISO2022JP::encode - * @covers MensBeam\Intl\Encoding\ISO2022JP::errEnc + * @coversNothing */ public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { return parent::testEncodeCodePointsStatically($fatal, $input, $exp); diff --git a/tools/test-iso2022jp.html b/tools/test-iso2022jp.html index c481ba2..8e583eb 100644 --- a/tools/test-iso2022jp.html +++ b/tools/test-iso2022jp.html @@ -17,8 +17,29 @@ var sampleStrings = { 'Invalid bytes': "80 FF 1B2849 00 20 7F 1B2442 00 2100 FF FF", }; var sampleCharacters = { + 'U+0020': [0x20], + 'U+005C': [0x5C], + 'U+007E': [0x7E], + 'U+00A5': [0xA5], + 'U+203E': [0x203E], + 'U+FF61': [0xFF61], + 'U+FF9F': [0xFF9F], + 'U+2212': [0x2212], + 'U+2116': [0x2116], + 'U+FFE2': [0xFFE2], + 'U+00C6': [0xC6], + 'U+FFFD': [0xFFFD], + 'Roman': [0xA5, 0x20, 0x203E], + 'Roman to ASCII': [0xA5, 0x5C], + 'Roman to error': [0xA5, 0x80], + 'JIS': [0x2116, 0xFFE2, 0x2212], + 'JIS to Roman': [0x2116, 0xA5], + 'JIS to ASCII 1': [0x2116, 0x20], + 'JIS to ASCII 2': [0x2116, 0x5C], + 'JIS to error': [0x2116, 0x80], + 'Escape characters': [0x1B, 0xE, 0xF], // Even Firefox is wrong here; see https://github.com/web-platform-tests/wpt/pull/26158 + '-1': [-1], + '0x110000': [0x110000], }; -var seekCodePoints = [ -]; diff --git a/tools/test.js b/tools/test.js index 8a34819..6c36c94 100644 --- a/tools/test.js +++ b/tools/test.js @@ -10,9 +10,9 @@ function encodeCodePoint(code, fatal) { return 'new EncoderException("", Encoding::E_INVALID_CODE_POINT)'; } else { var l = document.createElement("a"); - l.href = "http://example.com/?" + String.fromCodePoint(code) + "a"; + l.href = "http://example.com/?" + String.fromCodePoint(code) + "#"; var bytes = []; - let url = l.search.substr(1, l.search.length - 2); + let url = l.search.substr(1); for (let a = 0; a < url.length; a++) { if ((url.charAt(a) == "%" && url.substr(a, 6) == "%26%23") || url.charAt(a) == "&") { // character cannot be encoded @@ -32,10 +32,33 @@ function encodeCodePoint(code, fatal) { return bytes; } +function encodeCodePoints(codes, fatal) { + for (let a = 0; a < codes.length; a++) { + if (codes[a] < 0 || codes[a] > 0x10FFFF) { + return 'new EncoderException("", Encoding::E_INVALID_CODE_POINT)'; + } + } + var l = document.createElement("a"); + l.href = "http://example.com/?" + String.fromCodePoint(...codes) + "#"; + var bytes = []; + let url = decodeURIComponent(l.search.substr(1)); + if (fatal && url.indexOf("&#") > -1) { + return 'new EncoderException("", Encoding::E_UNAVAILABLE_CODE_POINT)'; + } + for (let a = 0; a < url.length; a++) { + bytes.push(url.charCodeAt(a).toString(16).padStart(2, "0").toUpperCase()); + } + return bytes; +} + function wrapCodePoint(code, fatal) { - var out = encodeCodePoint(code, fatal); + if (typeof code === "number") { + var out = encodeCodePoint(code, fatal); + } else { + var out = encodeCodePoints(code, fatal); + } if (Array.isArray(out)) { - return '"' + out.join(" ") + '"'; + return ('"' + out.join(" ") + '"').toUpperCase(); } else if (out.charAt(0) == "&") { return 'bin2hex("' + out + '")'; } else { @@ -72,9 +95,21 @@ if(typeof sampleStrings != 'undefined') { if(typeof sampleCharacters != 'undefined') { for (name in sampleCharacters) { let code = sampleCharacters[name]; - if (code > -1 && code % 1 == 0) code = "0x" + code.toString(16).toUpperCase(); - let line1 = "'" + name + " (HTML)' => [false, " + code + ", " + wrapCodePoint(code, false) + "],\n"; - let line2 = "'" + name + " (fatal)' => [true, " + code + ", " + wrapCodePoint(code, true) + "],\n"; + if (typeof code == "number" && code > -1 && code % 1 == 0) { + var displayCode = "0x" + code.toString(16).toUpperCase(); + } else if (typeof code !== "number") { + var displayCode = [...code]; + for (let a = 0; a < displayCode.length; a++) { + if (displayCode[a] > -1 && displayCode[a] % 1 == 0) { + displayCode[a] = "0x" + displayCode[a].toString(16).toUpperCase(); + } + } + displayCode = "[" + displayCode.join(", ") + "]"; + } else { + var displayCode = code; + } + let line1 = "'" + name + " (HTML)' => [false, " + displayCode + ", " + wrapCodePoint(code, false) + "],\n"; + let line2 = "'" + name + " (fatal)' => [true, " + displayCode + ", " + wrapCodePoint(code, true) + "],\n"; out.appendChild(document.createTextNode(line1)); out.appendChild(document.createTextNode(line2)); }