Second stab at Shift_JIS
- Decoder implemented, with correct table - Modernized decoder; may have bugs - Backwards seeker hopefully, though it does not yet pass fuzzer
This commit is contained in:
parent
b284056644
commit
9e812ffdf8
3 changed files with 96 additions and 31 deletions
File diff suppressed because one or more lines are too long
|
@ -136,6 +136,14 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
|
|||
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @covers MensBeam\Intl\Encoding\ShiftJIS::seekBack
|
||||
*/
|
||||
public function testSeekBackOverRandomData() {
|
||||
return parent::testSeekBackOverRandomData();
|
||||
}
|
||||
|
||||
public function provideCodePoints() {
|
||||
return [
|
||||
];
|
||||
|
|
|
@ -165,7 +165,34 @@ function eucjp(string $label) {
|
|||
|
||||
function shiftjis(string $label) {
|
||||
$codes = make_decoder_point_array(read_index($label, "https://encoding.spec.whatwg.org/index-jis0208.txt"));
|
||||
echo "const TABLE_CODES = $codes;\n";
|
||||
$table = eval("return $codes;");
|
||||
// remove the block of pointers between 8272 and 8835
|
||||
// see https://encoding.spec.whatwg.org#index-shift_jis-pointer
|
||||
foreach (range(8272, 8835) as $pointer) {
|
||||
unset($table[$pointer]);
|
||||
}
|
||||
// now search for each unique code point's first pointer in the table as normal
|
||||
$enc = [];
|
||||
$a = 0;
|
||||
$points = array_unique($table);
|
||||
sort($points);
|
||||
foreach ($points as $point) {
|
||||
// find the correct pointer
|
||||
$pointer = array_search($point, $table);
|
||||
// step the output array's key
|
||||
if ($a == $point) {
|
||||
$key = "";
|
||||
} else {
|
||||
$a = $point;
|
||||
$key = "$point=>";
|
||||
}
|
||||
$a++;
|
||||
$enc[] = "$key$pointer";
|
||||
}
|
||||
// compose the encoder table literal
|
||||
$enc = "[".implode(",", $enc)."]";
|
||||
echo "const TABLE_CODES_DEC = $codes;\n";
|
||||
echo "const TABLE_CODES_ENC = $enc;\n";
|
||||
}
|
||||
|
||||
// generic helper functions
|
||||
|
|
Loading…
Reference in a new issue