Browse Source

Initial implementation of EUC-JP

multi-byte
J. King 6 years ago
parent
commit
8dfb1ba984
  1. 156
      lib/Encoding/EUCJP.php
  2. 150
      tests/cases/Encoding/TestEUCJP.php
  3. 1
      tests/phpunit.xml
  4. 9
      tools/mkindex.php
  5. 6
      tools/mktest.php
  6. 16
      tools/test-eucjp.html

156
lib/Encoding/EUCJP.php

File diff suppressed because one or more lines are too long

150
tests/cases/Encoding/TestEUCJP.php

File diff suppressed because one or more lines are too long

1
tests/phpunit.xml

@ -23,6 +23,7 @@
<file>cases/Encoding/TestUTF16BE.php</file>
<file>cases/Encoding/TestSingleByte.php</file>
<file>cases/Encoding/TestXUserDefined.php</file>
<file>cases/Encoding/TestEUCJP.php</file>
<file>cases/Encoding/TestGB18030.php</file>
<file>cases/Encoding/TestBig5.php</file>
<file>cases/Encoding/TestEUCKR.php</file>

9
tools/mkindex.php

@ -1,7 +1,7 @@
<?php
$labels = [
'big5' => "big5",
//'euc-jp' => "eucjp",
'euc-jp' => "eucjp",
'euc-kr' => "euckr",
'gb18030' => "gb18030",
'ibm866' => "single_byte",
@ -130,6 +130,13 @@ function euckr(string $label) {
echo "const TABLE_CODES = $codes;\n";
}
function eucjp(string $label) {
$jis0208 = make_decoder_point_array(read_index("jis0208", "https://encoding.spec.whatwg.org/index-jis0208.txt"));
$jis0212 = make_decoder_point_array(read_index("jis0212", "https://encoding.spec.whatwg.org/index-jis0212.txt"));
echo "const TABLE_JIS0208 = $jis0208;\n";
echo "const TABLE_JIS0212 = $jis0212;\n";
}
// generic helper functions
function read_index(string $label, string $url): array {

6
tools/mktest.php

@ -55,6 +55,12 @@ function make_test(string $label, string $url): array {
$code = hexdec($match[1]);
if ($label=="gb18030" && $bytes=="A8BC") { // this test is incorrect or out of date; both Vivaldi and Firefox yield code point 7743
$code = 7743;
} elseif ($label=="euc-jp") { // these tests are out of date
if ($bytes == "5C") {
$code = 92;
} elseif ($bytes == "7E") {
$code = 126;
}
}
// convert the code point to decimal
$out[] = $code;

16
tools/test-eucjp.html

@ -0,0 +1,16 @@
<!DOCTYPE html>
<meta charset=euc-jp>
<script>
var sampleStrings = {
'empty string': "",
// valid single characters
'sanity check': "40",
'former ASCII deviations': "5C 7E",
'problem': "A1DD",
};
var sampleCharacters = {
};
var seekCodePoints = [
];
</script>
<script src="test.js"></script>
Loading…
Cancel
Save