J. King
4 years ago
80 changed files with 3068 additions and 627 deletions
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,20 @@ |
|||||
|
<?php |
||||
|
/** @license MIT |
||||
|
* Copyright 2018 J. King et al. |
||||
|
* See LICENSE and AUTHORS files for details */ |
||||
|
|
||||
|
declare(strict_types=1); |
||||
|
namespace MensBeam\Intl\Encoding; |
||||
|
|
||||
|
interface Coder { |
||||
|
public const E_INVALID_CODE_POINT = 1; |
||||
|
public const E_UNAVAILABLE_CODE_POINT = 3; |
||||
|
public const E_UNAVAILABLE_ENCODER = 4; |
||||
|
|
||||
|
/** Returns the encoding of $codePoint as a byte string |
||||
|
* |
||||
|
* @param int $codePoint The Unicode code point to encode. If less than 0 or greater than 1114111, an exception is thrown |
||||
|
* @param bool $fatal Whether an exception will be thrown if the code point cannot be encoded into a character; if false HTML character references will be substituted |
||||
|
*/ |
||||
|
public static function encode(int $codePoint, bool $fatal = true): string; |
||||
|
} |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,321 @@ |
|||||
|
<?php |
||||
|
/** @license MIT |
||||
|
* Copyright 2018 J. King et al. |
||||
|
* See LICENSE and AUTHORS files for details */ |
||||
|
|
||||
|
declare(strict_types=1); |
||||
|
namespace MensBeam\Intl\Encoding; |
||||
|
|
||||
|
use MensBeam\Intl\Encoding as Matcher; |
||||
|
|
||||
|
class Encoder { |
||||
|
protected $name; |
||||
|
protected $fatal = true; |
||||
|
|
||||
|
/** Constructs a new encoder for the specified $label |
||||
|
* |
||||
|
* @param string $label One of the encoding labels listed in the specification e.g. "utf-8", "Latin1", "shift_JIS" |
||||
|
* @param bool $fatal If true (the default) exceptions will be thrown when a character cannot be represented in the target encoding; if false HTML character references will be substituted instead |
||||
|
* |
||||
|
* @see https://encoding.spec.whatwg.org#names-and-labels |
||||
|
*/ |
||||
|
public function __construct(string $label, bool $fatal = true) { |
||||
|
$l = Matcher::matchLabel($label); |
||||
|
if (!$l || !$l['encoder']) { |
||||
|
throw new EncoderException("Label '$label' does not have an encoder", Coder::E_UNAVAILABLE_ENCODER); |
||||
|
} else { |
||||
|
$this->name = $l['name']; |
||||
|
$this->fatal = $fatal; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** Encodes a series of code point numbers into a string |
||||
|
* |
||||
|
* @param iterable $codePoints An iterable set of integers representing code points in the Unicode range |
||||
|
*/ |
||||
|
public function encode(iterable $codePoints): string { |
||||
|
$out = ""; |
||||
|
switch ($this->name) { |
||||
|
case "UTF-8": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= UTF8::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "Big5": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Big5::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "EUC-JP": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= EUCJP::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "EUC-KR": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= EUCKR::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "gb18030": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= GB18030::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "GBK": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= GBK::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "IBM866": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= IBM866::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-2022-JP": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO2022JP::encode($codePoint, $this->fatal, $mode); |
||||
|
} |
||||
|
$out .= ISO2022JP::encode(null, $this->fatal, $mode); |
||||
|
break; |
||||
|
case "ISO-8859-2": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88592::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-3": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88593::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-4": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88594::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-5": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88595::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-6": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88596::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-7": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88597::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-8": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88598::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-8-I": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO88598I::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-10": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO885910::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-13": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO885913::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-14": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO885914::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-15": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO885915::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "ISO-8859-16": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ISO885916::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "KOI8-R": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= KOI8R::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "KOI8-U": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= KOI8U::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "macintosh": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Macintosh::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "Shift_JIS": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= ShiftJIS::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1250": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1250::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1251": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1251::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1252": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1252::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1253": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1253::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1254": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1254::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1255": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1255::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1256": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1256::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1257": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1257::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-1258": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows1258::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "windows-874": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= Windows874::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "x-mac-cyrillic": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= XMacCyrillic::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
case "x-user-defined": |
||||
|
foreach ($codePoints as $codePoint) { |
||||
|
$out .= XUserDefined::encode($codePoint, $this->fatal); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
return $out; |
||||
|
} |
||||
|
|
||||
|
/** Encodes a single character into a string |
||||
|
* |
||||
|
* When using this method to encode a string, the finalize() method should be called to terminate the string |
||||
|
* |
||||
|
* @param int $codePoint An integer representing the Unicode code point number to encode |
||||
|
*/ |
||||
|
public function encodeChar(int $codePoint): string { |
||||
|
switch ($this->name) { |
||||
|
case "UTF-8": |
||||
|
return UTF8::encode($codePoint, $this->fatal); |
||||
|
case "Big5": |
||||
|
return Big5::encode($codePoint, $this->fatal); |
||||
|
case "EUC-JP": |
||||
|
return EUCJP::encode($codePoint, $this->fatal); |
||||
|
case "EUC-KR": |
||||
|
return EUCKR::encode($codePoint, $this->fatal); |
||||
|
case "gb18030": |
||||
|
return GB18030::encode($codePoint, $this->fatal); |
||||
|
case "GBK": |
||||
|
return GBK::encode($codePoint, $this->fatal); |
||||
|
case "IBM866": |
||||
|
return IBM866::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-2": |
||||
|
return ISO88592::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-3": |
||||
|
return ISO88593::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-4": |
||||
|
return ISO88594::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-5": |
||||
|
return ISO88595::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-6": |
||||
|
return ISO88596::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-7": |
||||
|
return ISO88597::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-8": |
||||
|
return ISO88598::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-8-I": |
||||
|
return ISO88598I::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-10": |
||||
|
return ISO885910::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-13": |
||||
|
return ISO885913::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-14": |
||||
|
return ISO885914::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-15": |
||||
|
return ISO885915::encode($codePoint, $this->fatal); |
||||
|
case "ISO-8859-16": |
||||
|
return ISO885916::encode($codePoint, $this->fatal); |
||||
|
case "KOI8-R": |
||||
|
return KOI8R::encode($codePoint, $this->fatal); |
||||
|
case "KOI8-U": |
||||
|
return KOI8U::encode($codePoint, $this->fatal); |
||||
|
case "macintosh": |
||||
|
return Macintosh::encode($codePoint, $this->fatal); |
||||
|
case "Shift_JIS": |
||||
|
return ShiftJIS::encode($codePoint, $this->fatal); |
||||
|
case "windows-1250": |
||||
|
return Windows1250::encode($codePoint, $this->fatal); |
||||
|
case "windows-1251": |
||||
|
return Windows1251::encode($codePoint, $this->fatal); |
||||
|
case "windows-1252": |
||||
|
return Windows1252::encode($codePoint, $this->fatal); |
||||
|
case "windows-1253": |
||||
|
return Windows1253::encode($codePoint, $this->fatal); |
||||
|
case "windows-1254": |
||||
|
return Windows1254::encode($codePoint, $this->fatal); |
||||
|
case "windows-1255": |
||||
|
return Windows1255::encode($codePoint, $this->fatal); |
||||
|
case "windows-1256": |
||||
|
return Windows1256::encode($codePoint, $this->fatal); |
||||
|
case "windows-1257": |
||||
|
return Windows1257::encode($codePoint, $this->fatal); |
||||
|
case "windows-1258": |
||||
|
return Windows1258::encode($codePoint, $this->fatal); |
||||
|
case "windows-874": |
||||
|
return Windows874::encode($codePoint, $this->fatal); |
||||
|
case "x-mac-cyrillic": |
||||
|
return XMacCyrillic::encode($codePoint, $this->fatal); |
||||
|
case "x-user-defined": |
||||
|
return XUserDefined::encode($codePoint, $this->fatal); |
||||
|
case "ISO-2022-JP": |
||||
|
return ISO2022JP::encode($codePoint, $this->fatal, $this->mode); |
||||
|
} |
||||
|
} // @codeCoverageIgnore |
||||
|
|
||||
|
/** Finalizes a string, returning any terminal bytes to append to the output |
||||
|
* |
||||
|
* For the ISO-2022-JP encoding, this method must be called fater the last character is encoded to correctly encode a string; for other encodings this is a no-op |
||||
|
*/ |
||||
|
public function finalize(): string { |
||||
|
return ISO2022JP::encode(null, $this->fatal, $this->mode); |
||||
|
} |
||||
|
} |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,17 @@ |
|||||
|
<?php |
||||
|
/** @license MIT |
||||
|
* Copyright 2018 J. King et al. |
||||
|
* See LICENSE and AUTHORS files for details */ |
||||
|
|
||||
|
declare(strict_types=1); |
||||
|
namespace MensBeam\Intl\Encoding; |
||||
|
|
||||
|
interface ModalCoder { |
||||
|
/** Returns the encoding of $codePoint as a byte string |
||||
|
* |
||||
|
* @param int $codePoint The Unicode code point to encode. If less than 0 or greater than 1114111, an exception is thrown; if $codePoint is null this signals end-of-file |
||||
|
* @param bool $fatal Whether an exception will be thrown if the code point cannot be encoded into a character; if false HTML character references will be substituted |
||||
|
* @param mixed &$mode A reference keeping track of the current encoder mode. An uninitialized variable should be passed on first invocation, and that variable used for further invocations. |
||||
|
*/ |
||||
|
public static function encode(?int $codePoint, bool $fatal = true, &$mode = null): string; |
||||
|
} |
@ -0,0 +1,125 @@ |
|||||
|
<?php |
||||
|
/** @license MIT |
||||
|
* Copyright 2018 J. King et al. |
||||
|
* See LICENSE and AUTHORS files for details */ |
||||
|
|
||||
|
declare(strict_types=1); |
||||
|
namespace MensBeam\Intl\Encoding; |
||||
|
|
||||
|
class Replacement implements Decoder { |
||||
|
public const NAME = "replacement"; |
||||
|
public const LABELS = [ |
||||
|
"csiso2022kr", |
||||
|
"hz-gb-2312", |
||||
|
"iso-2022-cn", |
||||
|
"iso-2022-cn-ext", |
||||
|
"iso-2022-kr", |
||||
|
"replacement", |
||||
|
]; |
||||
|
|
||||
|
protected $len = 0; |
||||
|
protected $done = false; |
||||
|
protected $fatal = false; |
||||
|
|
||||
|
public $posErr = 0; |
||||
|
|
||||
|
public function __construct(string $string, bool $fatal = false, bool $allowSurrogates = false) { |
||||
|
$this->len = strlen($string); |
||||
|
$this->fatal = $fatal; |
||||
|
} |
||||
|
|
||||
|
public function posByte(): int { |
||||
|
return $this->done ? $this->len : 0; |
||||
|
} |
||||
|
|
||||
|
public function posChar(): int { |
||||
|
return $this->done ? 1 : 0; |
||||
|
} |
||||
|
|
||||
|
public function nextChar(): string { |
||||
|
if (!$this->eof()) { |
||||
|
try { |
||||
|
return $this->peekChar(); |
||||
|
} finally { |
||||
|
$this->done = true; |
||||
|
$this->posErr = 1; |
||||
|
} |
||||
|
} |
||||
|
return ""; |
||||
|
} |
||||
|
|
||||
|
public function nextCode() { |
||||
|
if (!$this->eof()) { |
||||
|
try { |
||||
|
return $this->peekCode()[0]; |
||||
|
} finally { |
||||
|
$this->done = true; |
||||
|
$this->posErr = 1; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
public function seek(int $distance): int { |
||||
|
if ($distance > 0) { |
||||
|
if (!$this->eof()) { |
||||
|
$distance--; |
||||
|
$this->nextCode(); |
||||
|
} |
||||
|
} elseif ($distance < 0) { |
||||
|
if ($this->eof()) { |
||||
|
$distance++; |
||||
|
$this->rewind(); |
||||
|
} |
||||
|
} |
||||
|
return $distance; |
||||
|
} |
||||
|
|
||||
|
public function rewind(): void { |
||||
|
$this->done = false; |
||||
|
} |
||||
|
|
||||
|
public function peekChar(int $num = 1): string { |
||||
|
if (!$this->eof() && $num > 0) { |
||||
|
if ($this->fatal) { |
||||
|
throw new DecoderException("Unable to decode string", self::E_INVALID_BYTE); |
||||
|
} |
||||
|
return "\u{FFFD}"; |
||||
|
} |
||||
|
return ""; |
||||
|
} |
||||
|
|
||||
|
public function peekCode(int $num = 1): array { |
||||
|
if (!$this->eof() && $num > 0) { |
||||
|
if ($this->fatal) { |
||||
|
throw new DecoderException("Unable to decode string", self::E_INVALID_BYTE); |
||||
|
} |
||||
|
return [0xFFFD]; |
||||
|
} |
||||
|
return []; |
||||
|
} |
||||
|
|
||||
|
public function lenByte(): int { |
||||
|
return $this->len; |
||||
|
} |
||||
|
|
||||
|
public function lenChar(): int { |
||||
|
return (int) ($this->len > 0); |
||||
|
} |
||||
|
|
||||
|
public function eof(): bool { |
||||
|
return $this->done || $this->len === 0; |
||||
|
} |
||||
|
|
||||
|
public function chars(): \Generator { |
||||
|
if (!$this->eof()) { |
||||
|
yield 0 => $this->nextChar(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public function codes(): \Generator { |
||||
|
if (!$this->eof()) { |
||||
|
yield 0 => $this->nextCode(); |
||||
|
} |
||||
|
} |
||||
|
} |
File diff suppressed because one or more lines are too long
@ -1,18 +0,0 @@ |
|||||
<?php |
|
||||
/** @license MIT |
|
||||
* Copyright 2018 J. King et al. |
|
||||
* See LICENSE and AUTHORS files for details */ |
|
||||
|
|
||||
declare(strict_types=1); |
|
||||
namespace MensBeam\Intl\Encoding; |
|
||||
|
|
||||
interface StatefulEncoding extends Encoding { |
|
||||
|
|
||||
/** Returns the encoding of $codePoints as a byte string |
|
||||
* |
|
||||
* If any element of $codePoints is less than 0 or greater than 1114111, an exception is thrown |
|
||||
* |
|
||||
* If $fatal is true, an exception will be thrown if any code point cannot be encoded into a character; otherwise HTML character references will be substituted |
|
||||
*/ |
|
||||
public static function encode(array $codePoints, bool $fatal = true): string; |
|
||||
} |
|
@ -1,18 +0,0 @@ |
|||||
<?php |
|
||||
/** @license MIT |
|
||||
* Copyright 2018 J. King et al. |
|
||||
* See LICENSE and AUTHORS files for details */ |
|
||||
|
|
||||
declare(strict_types=1); |
|
||||
namespace MensBeam\Intl\Encoding; |
|
||||
|
|
||||
interface StatelessEncoding extends Encoding { |
|
||||
|
|
||||
/** Returns the encoding of $codePoint as a byte string |
|
||||
* |
|
||||
* If $codePoint is less than 0 or greater than 1114111, an exception is thrown |
|
||||
* |
|
||||
* If $fatal is true, an exception will be thrown if the code point cannot be encoded into a character; otherwise HTML character references will be substituted |
|
||||
*/ |
|
||||
public static function encode(int $codePoint, bool $fatal = true): string; |
|
||||
} |
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,263 @@ |
|||||
|
<?php |
||||
|
/** @license MIT |
||||
|
* Copyright 2018 J. King et al. |
||||
|
* See LICENSE and AUTHORS files for details */ |
||||
|
|
||||
|
declare(strict_types=1); |
||||
|
namespace MensBeam\Intl\TestCase\Encoding; |
||||
|
|
||||
|
use MensBeam\Intl\Encoding\ISO2022JP; |
||||
|
use MensBeam\Intl\Encoding\Coder; |
||||
|
use MensBeam\Intl\Encoding\EncoderException; |
||||
|
|
||||
|
class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { |
||||
|
protected $testedClass = ISO2022JP::class; |
||||
|
/* |
||||
|
Char 0 U+007A (1 byte) Offset 0 |
||||
|
Esc: Katakana (3 bytes) Offset 1 |
||||
|
Char 1 U+FF9C (1 byte) Offset 4 |
||||
|
Char 2 U+FF9F (1 byte) Offset 5 |
||||
|
Esc: Double-byte (3 bytes) Offset 6 |
||||
|
Char 3 U+79FB (2 bytes) Offset 9 |
||||
|
Char 4 U+67B8 (2 bytes) Offset 11 |
||||
|
Char 5 U+9B91 (2 bytes) Offset 13 |
||||
|
Esc: ASCII (3 bytes) Offset 15 |
||||
|
Char 6 U+007E (1 byte) Offset 18 |
||||
|
Esc: Roman (3 bytes) Offset 19 |
||||
|
End of string at char 7, offset 22 |
||||
|
*/ |
||||
|
protected $seekString = "7A 1B2849 5C 5F 1B2440 305C 5B4E 723A 1B2842 7E 1B284A"; |
||||
|
protected $seekCodes = [0x7A, 0xFF9C, 0xFF9F, 0x79FB, 0x67B8, 0x9B91, 0x7E]; |
||||
|
protected $seekOffsets = [0, 1, 5, 6, 11, 13, 15, 19]; |
||||
|
/* This string contains an invalid character sequence sandwiched between two null characters */ |
||||
|
protected $brokenChar = "00 FF 00"; |
||||
|
|
||||
|
public function provideCodePoints() { |
||||
|
return [ |
||||
|
'U+0020 (HTML)' => [false, [0x20], "20"], |
||||
|
'U+0020 (fatal)' => [true, [0x20], "20"], |
||||
|
'U+005C (HTML)' => [false, [0x5C], "5C"], |
||||
|
'U+005C (fatal)' => [true, [0x5C], "5C"], |
||||
|
'U+007E (HTML)' => [false, [0x7E], "7E"], |
||||
|
'U+007E (fatal)' => [true, [0x7E], "7E"], |
||||
|
'U+00A5 (HTML)' => [false, [0xA5], "1B 28 4A 5C 1B 28 42"], |
||||
|
'U+00A5 (fatal)' => [true, [0xA5], "1B 28 4A 5C 1B 28 42"], |
||||
|
'U+203E (HTML)' => [false, [0x203E], "1B 28 4A 7E 1B 28 42"], |
||||
|
'U+203E (fatal)' => [true, [0x203E], "1B 28 4A 7E 1B 28 42"], |
||||
|
'U+FF61 (HTML)' => [false, [0xFF61], "1B 24 42 21 23 1B 28 42"], |
||||
|
'U+FF61 (fatal)' => [true, [0xFF61], "1B 24 42 21 23 1B 28 42"], |
||||
|
'U+FF9F (HTML)' => [false, [0xFF9F], "1B 24 42 21 2C 1B 28 42"], |
||||
|
'U+FF9F (fatal)' => [true, [0xFF9F], "1B 24 42 21 2C 1B 28 42"], |
||||
|
'U+2212 (HTML)' => [false, [0x2212], "1B 24 42 21 5D 1B 28 42"], |
||||
|
'U+2212 (fatal)' => [true, [0x2212], "1B 24 42 21 5D 1B 28 42"], |
||||
|
'U+2116 (HTML)' => [false, [0x2116], "1B 24 42 2D 62 1B 28 42"], |
||||
|
'U+2116 (fatal)' => [true, [0x2116], "1B 24 42 2D 62 1B 28 42"], |
||||
|
'U+FFE2 (HTML)' => [false, [0xFFE2], "1B 24 42 22 4C 1B 28 42"], |
||||
|
'U+FFE2 (fatal)' => [true, [0xFFE2], "1B 24 42 22 4C 1B 28 42"], |
||||
|
'U+00C6 (HTML)' => [false, [0xC6], "26 23 31 39 38 3B"], |
||||
|
'U+00C6 (fatal)' => [true, [0xC6], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)], |
||||
|
'U+FFFD (HTML)' => [false, [0xFFFD], "26 23 36 35 35 33 33 3B"], |
||||
|
'U+FFFD (fatal)' => [true, [0xFFFD], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)], |
||||
|
'Roman (HTML)' => [false, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"], |
||||
|
'Roman (fatal)' => [true, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"], |
||||
|
'Roman to ASCII (HTML)' => [false, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"], |
||||
|
'Roman to ASCII (fatal)' => [true, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"], |
||||
|
'Roman to error (HTML)' => [false, [0xA5, 0x80], "1B 28 4A 5C 26 23 31 32 38 3B 1B 28 42"], |
||||
|
'Roman to error (fatal)' => [true, [0xA5, 0x80], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)], |
||||
|
'JIS (HTML)' => [false, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"], |
||||
|
'JIS (fatal)' => [true, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"], |
||||
|
'JIS to Roman (HTML)' => [false, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"], |
||||
|
'JIS to Roman (fatal)' => [true, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"], |
||||
|
'JIS to ASCII 1 (HTML)' => [false, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"], |
||||
|
'JIS to ASCII 1 (fatal)' => [true, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"], |
||||
|
'JIS to ASCII 2 (HTML)' => [false, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"], |
||||
|
'JIS to ASCII 2 (fatal)' => [true, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"], |
||||
|
'JIS to error 1 (HTML)' => [false, [0x2116, 0x80], "1B 24 42 2D 62 1B 28 42 26 23 31 32 38 3B"], |
||||
|
'JIS to error 1 (fatal)' => [true, [0x2116, 0x80], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)], |
||||
|
'JIS to error 2 (HTML)' => [false, [0x2116, 0x1B], "1B 24 42 2D 62 1B 28 42 26 23 36 35 35 33 33 3B"], |
||||
|
'JIS to error 2 (fatal)' => [true, [0x2116, 0x1B], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)], |
||||
|
'Escape characters (HTML)' => [false, [0x1B, 0xE, 0xF], "26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B"], |
||||
|
'Escape characters (fatal)' => [true, [0x1B, 0xE, 0xF], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)], |
||||
|
'-1 (HTML)' => [false, [-1], new EncoderException("", Coder::E_INVALID_CODE_POINT)], |
||||
|
'-1 (fatal)' => [true, [-1], new EncoderException("", Coder::E_INVALID_CODE_POINT)], |
||||
|
'0x110000 (HTML)' => [false, [0x110000], new EncoderException("", Coder::E_INVALID_CODE_POINT)], |
||||
|
'0x110000 (fatal)' => [true, [0x110000], new EncoderException("", Coder::E_INVALID_CODE_POINT)], |
||||
|
]; |
||||
|
} |
||||
|
|
||||
|
public function provideStrings() { |
||||
|
return [ |
||||
|
'empty string' => ["", []], |
||||
|
'Implied ASCII mode' => ["00 30 5C 7E 21 5F", [0, 48, 92, 126, 33, 95]], |
||||
|
'Explicit ASCII mode' => ["1B2842 00 30 5C 7E 21 5F", [0, 48, 92, 126, 33, 95]], |
||||
|
'Roman mode' => ["1B284A 00 30 5C 7E 21 5F", [0, 48, 165, 8254, 33, 95]], |
||||
|
'Katakana mode' => ["1B2849 00 30 5C 7E 21 5F", [65533, 65392, 65436, 65533, 65377, 65439]], |
||||
|
'Double-byte mode 1' => ["1B2440 00 305C 7E21 5F", [65533, 31227, 65533, 65533]], |
||||
|
'Double-byte mode 2' => ["1B2442 00 305C 7E21 5F", [65533, 31227, 65533, 65533]], |
||||
|
'Multiple modes' => ["5C 1B2849 21 1B2440 305C 1B284A 5C 1B2842 5C", [92, 65377, 31227, 165, 92]], |
||||
|
'Double escape' => ["1B2849 1B2842 5C", [65533, 92]], |
||||
|
'Triple escape' => ["1B2849 1B2842 1B284A 5C", [65533, 65533, 165]], |
||||
|
'Trailing escape' => ["20 1B284A 30 33 1B2849", [32, 48, 51]], |
||||
|
'Truncated escape 1' => ["1B", [65533]], |
||||
|
'Truncated escape 2' => ["1B28", [65533, 40]], |
||||
|
'Truncated escape 3' => ["1B2820", [65533, 40, 32]], |
||||
|
'Truncated escape 4' => ["1B2020", [65533, 32, 32]], |
||||
|
'Invalid escape 1' => ["1B2840", [65533, 40, 64]], |
||||
|
'Invalid escape 2' => ["1B244A", [65533, 36, 74]], |
||||
|
'Invalid bytes' => ["80 FF 1B2849 00 20 7F 1B2442 00 2100 FF FF", [65533, 65533, 65533, 65533, 65533, 65533, 65533, 65533, 65533]], |
||||
|
]; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideCodePoints |
||||
|
* @covers MensBeam\Intl\Encoding\Encoder |
||||
|
*/ |
||||
|
public function testEncodeCodePoints(bool $fatal, $input, $exp) { |
||||
|
return parent::testEncodeCodePoints($fatal, $input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideCodePoints |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::encode |
||||
|
*/ |
||||
|
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) { |
||||
|
$out = ""; |
||||
|
if ($exp instanceof \Throwable) { |
||||
|
$this->expectException(get_class($exp)); |
||||
|
$this->expectExceptionCode($exp->getCode()); |
||||
|
} else { |
||||
|
$exp = strtolower(str_replace(" ", "", $exp)); |
||||
|
} |
||||
|
foreach ($input as $char) { |
||||
|
$out .= ISO2022JP::encode($char, $fatal, $mode); |
||||
|
} |
||||
|
$out .= ISO2022JP::encode(null, $fatal, $mode); |
||||
|
$this->assertSame($exp, bin2hex($out)); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::__construct |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::nextCode |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::modeSet |
||||
|
*/ |
||||
|
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) { |
||||
|
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::__construct |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::nextChar |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::modeSet |
||||
|
*/ |
||||
|
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) { |
||||
|
return parent::testDecodeMultipleCharactersAsStrings($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::seekBack |
||||
|
*/ |
||||
|
public function testSTepBackThroughAString(string $input, array $exp) { |
||||
|
return parent::testSTepBackThroughAString($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::seek |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::posChar |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::posByte |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::rewind |
||||
|
*/ |
||||
|
public function testSeekThroughAString() { |
||||
|
return parent::testSeekThroughAString(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::posChar |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::posByte |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::eof |
||||
|
*/ |
||||
|
public function testTraversePastTheEndOfAString() { |
||||
|
return parent::testTraversePastTheEndOfAString(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::peekChar |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateSave |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateApply |
||||
|
*/ |
||||
|
public function testPeekAtCharacters() { |
||||
|
return parent::testPeekAtCharacters(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::peekCode |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateSave |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateApply |
||||
|
*/ |
||||
|
public function testPeekAtCodePoints() { |
||||
|
return parent::testPeekAtCodePoints(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::lenChar |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::lenByte |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateSave |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateApply |
||||
|
*/ |
||||
|
public function testGetStringLength(string $input, array $points) { |
||||
|
return parent::testGetStringLength($input, $points); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::errDec |
||||
|
*/ |
||||
|
public function testReplacementModes() { |
||||
|
return parent::testReplacementModes(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::rewind |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::chars |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::codes |
||||
|
*/ |
||||
|
public function testIterateThroughAString(string $input, array $exp) { |
||||
|
return parent::testIterateThroughAString($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @coversNothing |
||||
|
*/ |
||||
|
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) { |
||||
|
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\ISO2022JP::seekBack |
||||
|
*/ |
||||
|
public function testSeekBackOverRandomData() { |
||||
|
return parent::testSeekBackOverRandomData(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @group optional |
||||
|
*/ |
||||
|
public function testPedanticallyDecodeSingleCharactersAsCodePoint() { |
||||
|
$series = [ |
||||
|
]; |
||||
|
foreach ($series as $test) { |
||||
|
foreach ($test[0] as $a => $input) { |
||||
|
$class = $this->testedClass; |
||||
|
$char = hex2bin($input); |
||||
|
$exp = $test[1][$a]; |
||||
|
$s = new $class($char); |
||||
|
$this->assertSame($exp, $s->nextCode(), "Sequence $input did not decode to $exp."); |
||||
|
$this->assertFalse($s->nextCode(), "Sequence $input did not end after one character"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,201 @@ |
|||||
|
<?php |
||||
|
/** @license MIT |
||||
|
* Copyright 2018 J. King et al. |
||||
|
* See LICENSE and AUTHORS files for details */ |
||||
|
|
||||
|
declare(strict_types=1); |
||||
|
namespace MensBeam\Intl\TestCase\Encoding; |
||||
|
|
||||
|
use MensBeam\Intl\Encoding\Replacement; |
||||
|
use MensBeam\Intl\Encoding\DecoderException; |
||||
|
|
||||
|
class TestReplacement extends \MensBeam\Intl\Test\DecoderTest { |
||||
|
protected $testedClass = Replacement::class; |
||||
|
|
||||
|
public function provideStrings() { |
||||
|
return [ |
||||
|
// control samples |
||||
|
'empty string' => ["", []], |
||||
|
'Arbitrary string 1' => ["20", [0xFFFD]], |
||||
|
'Arbitrary string 2' => ["64 8B 20 00 FF A5", [0xFFFD]], |
||||
|
]; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::__construct |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::nextCode |
||||
|
*/ |
||||
|
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) { |
||||
|
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::__construct |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::nextChar |
||||
|
*/ |
||||
|
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) { |
||||
|
return parent::testDecodeMultipleCharactersAsStrings($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::seek |
||||
|
*/ |
||||
|
public function testSTepBackThroughAString(string $input, array $exp) { |
||||
|
return parent::testSTepBackThroughAString($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @coversNothing |
||||
|
*/ |
||||
|
public function testSeekThroughAString() { |
||||
|
$this->assertTrue(true); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posByte |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::seek |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::eof |
||||
|
*/ |
||||
|
public function testTraversePastTheEndOfAString() { |
||||
|
$d = new Replacement("a"); |
||||
|
$this->assertFalse($d->eof()); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
$d->seek(1); |
||||
|
$this->assertTrue($d->eof()); |
||||
|
$this->assertSame(1, $d->posChar()); |
||||
|
$this->assertSame(1, $d->posByte()); |
||||
|
$d->seek(1); |
||||
|
$this->assertTrue($d->eof()); |
||||
|
$this->assertSame(1, $d->posChar()); |
||||
|
$this->assertSame(1, $d->posByte()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::peekChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posByte |
||||
|
*/ |
||||
|
public function testPeekAtCharacters() { |
||||
|
$d = new Replacement("A"); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
$this->assertSame("\u{FFFD}", $d->peekChar(2112)); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
$this->assertSame("", $d->peekChar(0)); |
||||
|
$this->assertSame("", $d->peekChar(-2112)); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::peekCode |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posByte |
||||
|
*/ |
||||
|
public function testPeekAtCodePoints() { |
||||
|
$d = new Replacement("A"); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
$this->assertSame([0xFFFD], $d->peekCode(2112)); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
$this->assertSame([], $d->peekCode(0)); |
||||
|
$this->assertSame([], $d->peekCode(-2112)); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::lenChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::lenByte |
||||
|
*/ |
||||
|
public function testGetStringLength(string $input, array $points) { |
||||
|
return parent::testGetStringLength($input, $points); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::nextChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::nextCode |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::peekChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::peekCode |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::rewind |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posChar |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::posByte |
||||
|
*/ |
||||
|
public function testReplacementModes() { |
||||
|
$d = new Replacement("VVVVVV", true); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
try { |
||||
|
$p = $d->peekCode(); |
||||
|
} catch (\Exception $e) { |
||||
|
$p = $e; |
||||
|
} finally { |
||||
|
$this->assertInstanceOf(DecoderException::class, $p); |
||||
|
} |
||||
|
$this->assertSame(0, $d->posErr); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
try { |
||||
|
$p = $d->nextCode(); |
||||
|
} catch (\Exception $e) { |
||||
|
$p = $e; |
||||
|
} finally { |
||||
|
$this->assertInstanceOf(DecoderException::class, $p); |
||||
|
} |
||||
|
$this->assertSame(1, $d->posErr); |
||||
|
$this->assertSame(1, $d->posChar()); |
||||
|
$this->assertSame(6, $d->posByte()); |
||||
|
$d->rewind(); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
try { |
||||
|
$p = $d->peekChar(); |
||||
|
} catch (\Exception $e) { |
||||
|
$p = $e; |
||||
|
} finally { |
||||
|
$this->assertInstanceOf(DecoderException::class, $p); |
||||
|
} |
||||
|
$this->assertSame(1, $d->posErr); |
||||
|
$this->assertSame(0, $d->posChar()); |
||||
|
$this->assertSame(0, $d->posByte()); |
||||
|
try { |
||||
|
$p = $d->nextChar(); |
||||
|
} catch (\Exception $e) { |
||||
|
$p = $e; |
||||
|
} finally { |
||||
|
$this->assertInstanceOf(DecoderException::class, $p); |
||||
|
} |
||||
|
$this->assertSame(1, $d->posErr); |
||||
|
$this->assertSame(1, $d->posChar()); |
||||
|
$this->assertSame(6, $d->posByte()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::rewind |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::chars |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::codes |
||||
|
*/ |
||||
|
public function testIterateThroughAString(string $input, array $exp) { |
||||
|
return parent::testIterateThroughAString($input, $exp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @dataProvider provideStrings |
||||
|
* @covers MensBeam\Intl\Encoding\Replacement::nextCode |
||||
|
*/ |
||||
|
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) { |
||||
|
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* @coversNothing |
||||
|
*/ |
||||
|
public function testSeekBackOverRandomData() { |
||||
|
return parent::testSeekBackOverRandomData(); |
||||
|
} |
||||
|
} |
File diff suppressed because one or more lines are too long
@ -0,0 +1,57 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<meta charset=euc-jp> |
||||
|
<!-- Chromium does NOT produce correct results as of this writing; use Firefox to generate test data --> |
||||
|
<script> |
||||
|
var sampleStrings = { |
||||
|
'empty string': "", |
||||
|
// sanity checks |
||||
|
'sanity check': "40", |
||||
|
'former ASCII deviations': "5C 7E", |
||||
|
'changed multibyte index': "A1DD", |
||||
|
// JIS X 0201 |
||||
|
'JIS X 0201 range': "8EA1 8EDF", |
||||
|
'JIS X 0201 bogus range': "8EA0 8EE0", |
||||
|
'JIS X 0201 truncated character 1': "8E", |
||||
|
'JIS X 0201 truncated character 2': "8E 20", |
||||
|
'JIS X 0201 truncated character 3': "8E FF", |
||||
|
// JIS X 0212 |
||||
|
'JIS X 0212 assigned range': "8FA2AF 8FEDE3", |
||||
|
'JIS X 0212 total range': "8FA1A1 8FFEFE", |
||||
|
'JIS X 0212 bogus range 1': "8FA0A1 8FFFFE", |
||||
|
'JIS X 0212 bogus range 2': "8FA1A0 8FFEFF", |
||||
|
'JIS X 0212 truncated character 1': "8FA2", |
||||
|
'JIS X 0212 truncated character 2': "8FA2 20", |
||||
|
'JIS X 0212 truncated character 3': "8FA2 FF", |
||||
|
// JIS X 0208 |
||||
|
'JIS X 0208 assigned range': "A1A1 FCFE", |
||||
|
'JIS X 0208 total range': "A1A1 FEFE", |
||||
|
'JIS X 0208 bogus range': "A1A0 A0FE", |
||||
|
'JIS X 0208 truncated character 1': "A1", |
||||
|
'JIS X 0208 truncated character 2': "A1 20", |
||||
|
'JIS X 0208 truncated character 3': "A1 FF", |
||||
|
}; |
||||
|
var sampleCharacters = { |
||||
|
'U+0064': 0x64, |
||||
|
'U+00A5': 0xA5, |
||||
|
'U+203E': 0x203E, |
||||
|
'U+3088': 0x3088, |
||||
|
'U+FF96': 0xFF96, |
||||
|
'U+2212': 0x2212, |
||||
|
'U+00E6': 0xE6, |
||||
|
'U+FFE2': 0xFFE2, |
||||
|
'U+2116': 0x2116, |
||||
|
'-1': -1, |
||||
|
'0x110000': 0x110000, |
||||
|
}; |
||||
|
var seekCodePoints = [ |
||||
|
0x007A, |
||||
|
0xFF96, |
||||
|
0x3088, |
||||
|
0xFF0D, |
||||
|
0x005C, |
||||
|
0xFF9B, |
||||
|
/* This code point is not encodable and must be done manually entered as 8FB0EF */ |
||||
|
0x4F58, |
||||
|
]; |
||||
|
</script> |
||||
|
<script src="test.js"></script> |
@ -0,0 +1,46 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<meta charset=iso-2022-jp> |
||||
|
<!-- Chromium does NOT produce correct results as of this writing; use Firefox to generate test data --> |
||||
|
<script> |
||||
|
var sampleStrings = { |
||||
|
'empty string': "", |
||||
|
'Implied ASCII mode': "00 30 5C 7E 21 5F", |
||||
|
'Explicit ASCII mode': "1B2842 00 30 5C 7E 21 5F", |
||||
|
'Roman mode': "1B284A 00 30 5C 7E 21 5F", |
||||
|
'Katakana mode': "1B2849 00 30 5C 7E 21 5F", |
||||
|
'Double-byte mode 1': "1B2440 00 30 5C 7E 21 5F", |
||||
|
'Double-byte mode 2': "1B2442 00 30 5C 7E 21 5F", |
||||
|
'Multiple modes': "5C 1B2849 21 1B2440 305C 1B284A 5C 1B2842 5C", |
||||
|
'Double escape': "1B2849 1B2842 5C", |
||||
|
'Triple escape': "1B2849 1B2842 1B284A 5C", |
||||
|
'Trailing escape': "20 1B284A 30 33 1B2849", |
||||
|
'Invalid bytes': "80 FF 1B2849 00 20 7F 1B2442 00 2100 FF FF", |
||||
|
}; |
||||
|
var sampleCharacters = { |
||||
|
'U+0020': [0x20], |
||||
|
'U+005C': [0x5C], |
||||
|
'U+007E': [0x7E], |
||||
|
'U+00A5': [0xA5], |
||||
|
'U+203E': [0x203E], |
||||
|
'U+FF61': [0xFF61], |
||||
|
'U+FF9F': [0xFF9F], |
||||
|
'U+2212': [0x2212], |
||||
|
'U+2116': [0x2116], |
||||
|
'U+FFE2': [0xFFE2], |
||||
|
'U+00C6': [0xC6], |
||||
|
'U+FFFD': [0xFFFD], |
||||
|
'Roman': [0xA5, 0x20, 0x203E], |
||||
|
'Roman to ASCII': [0xA5, 0x5C], |
||||
|
'Roman to error': [0xA5, 0x80], |
||||
|
'JIS': [0x2116, 0xFFE2, 0x2212], |
||||
|
'JIS to Roman': [0x2116, 0xA5], |
||||
|
'JIS to ASCII 1': [0x2116, 0x20], |
||||
|
'JIS to ASCII 2': [0x2116, 0x5C], |
||||
|
'JIS to error 1': [0x2116, 0x80], |
||||
|
'JIS to error 2': [0x2116, 0x1B], // Even Firefox is wrong here; see https://github.com/web-platform-tests/wpt/pull/26158 |
||||
|
'Escape characters': [0x1B, 0xE, 0xF], // Even Firefox is wrong here; see https://github.com/web-platform-tests/wpt/pull/26158 |
||||
|
'-1': [-1], |
||||
|
'0x110000': [0x110000], |
||||
|
}; |
||||
|
</script> |
||||
|
<script src="test.js"></script> |
@ -0,0 +1,42 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<meta charset=shift_jis> |
||||
|
<!-- Chromium does NOT produce correct results as of this writing; use Firefox to generate test data --> |
||||
|
<script> |
||||
|
var sampleStrings = { |
||||
|
'empty string': "", |
||||
|
'sanity check': "40", |
||||
|
'former ASCII deviations': "5C 7E", |
||||
|
'JIS X 0201 range': "A1 DF", |
||||
|
'EUDC range': "F040 F9FC", |
||||
|
'JIS X 0208 assigned range': "8140 FC4B", |
||||
|
'JIS X 0208 total range': "8140 FCFC", |
||||
|
'JIS X 0208 truncated character 1': "81", |
||||
|
'JIS X 0208 truncated character 2': "81 20", |
||||
|
'JIS X 0208 truncated character 3': "81 FF", |
||||
|
}; |
||||
|
var sampleCharacters = { |
||||
|
'U+0064': 0x64, |
||||
|
'U+00A5': 0xA5, |
||||
|
'U+203E': 0x203E, |
||||
|
'U+3088': 0x3088, |
||||
|
'U+FF96': 0xFF96, |
||||
|
'U+2212': 0x2212, |
||||
|
'U+00E6': 0xE6, |
||||
|
'U+FFE2': 0xFFE2, |
||||
|
'U+2116': 0x2116, |
||||
|
'U+E000': 0xE000, |
||||
|
'-1': -1, |
||||
|
'0x110000': 0x110000, |
||||
|
}; |
||||
|
var seekCodePoints = [ |
||||
|
0x007A, |
||||
|
0xFF96, |
||||
|
0x3088, |
||||
|
0xFF0D, |
||||
|
0x005C, |
||||
|
0xFF9B, |
||||
|
/* This code point is not encodable and must be done manually entered as F040 */ |
||||
|
0xE000, |
||||
|
]; |
||||
|
</script> |
||||
|
<script src="test.js"></script> |
Loading…
Reference in new issue