A set of dependency-free basic internationalization tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

156 lines
150 KiB

<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class EUCJP extends AbstractEncoding implements StatelessEncoding {
const NAME = "EUC-JP";
const LABELS = [
"cseucpkdfmtjapanese",
"euc-jp",
"x-euc-jp"
];
const TABLE_JIS0208_DEC = [12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,12295,12540,8213,8208,65295,65340,65374,8741,65372,8230,8229,8216,8217,8220,8221,65288,65289,12308,12309,65339,65341,65371,65373,12296,12297,12298,12299,12300,12301,12302,12303,12304,12305,65291,65293,177,215,247,65309,8800,65308,65310,8806,8807,8734,8756,9794,9792,176,8242,8243,8451,65509,65284,65504,65505,65285,65283,65286,65290,65312,167,9734,9733,9675,9679,9678,9671,9670,9633,9632,9651,9650,9661,9660,8251,12306,8594,8592,8593,8595,12307,119=>8712,8715,8838,8839,8834,8835,8746,8745,135=>8743,8744,65506,8658,8660,8704,8707,153=>8736,8869,8978,8706,8711,8801,8786,8810,8811,8730,8765,8733,8757,8747,8748,175=>8491,8240,9839,9837,9834,8224,8225,182,187=>9711,203=>65296,65297,65298,65299,65300,65301,65302,65303,65304,65305,220=>65313,65314,65315,65316,65317,65318,65319,65320,65321,65322,65323,65324,65325,65326,65327,65328,65329,65330,65331,65332,65333,65334,65335,65336,65337,65338,252=>65345,65346,65347,65348,65349,65350,65351,65352,65353,65354,65355,65356,65357,65358,65359,65360,65361,65362,65363,65364,65365,65366,65367,65368,65369,65370,282=>12353,12354,12355,12356,12357,12358,12359,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369,12370,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384,12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400,12401,12402,12403,12404,12405,12406,12407,12408,12409,12410,12411,12412,12413,12414,12415,12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431,12432,12433,12434,12435,376=>12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462,12463,12464,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477,12478,12479,12480,12481,12482,12483,12484,12485,12486,12487,12488,12489,12490,12491,12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507,12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523,12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,470=>913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,931,932,933,934,935,936,937,502=>945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,963,964,965,966,967,968,969,564=>1040,1041,1042,1043,1044,1045,1025,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,612=>1072,1073,1074,1075,1076,1077,1105,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,658=>9472,9474,9484,9488,9496,9492,9500,9516,9508,9524,9532,9473,9475,9487,9491,9499,9495,9507,9523,9515,9531,9547,9504,9519,9512,9527,9535,9501,9520,9509,9528,9538,1128=>9312,9313,9314,9315,9316,9317,9318,9319,9320,9321,9322,9323,9324,9325,9326,9327,9328,9329,9330,9331,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,1159=>13129,13076,13090,13133,13080,13095,13059,13110,13137,13143,13069,13094,13091,13099,13130,13115,13212,13213,13214,13198,13199,13252,13217,1190=>13179,12317,12319,8470,13261,8481,12964,12965,12966,12967,12968,12849,12850,12857,13182,13181,13180,8786,8801,8747,8750,8721,8730,8869,8736,8735,8895,8757,8745,8746,1410=>20124,21782,23043,38463,21696,24859,25384,23030,36898,33909,33564,31312,24746,25569,28197,26093,33894,33446,39925,26771,22311,26017,25201,23451,22992,34427,39156,32098,32190,39822,25110,31903,34999,23433,24245,25353,26263,26696,38343,38797,26447,20197,20234,20301,20381,20553,22258,22839,22996,23041,23561,24799,24847,24944,26131,26885,28858,30031,30064,31227,32173,32239,32963,33806,34915,35586,36949,36986,21307,20117,20133,22495,32946,37057,30959,19968,22769,28322,36920,31282,33576,33419,39983,20801,21360,21693,21729,22240,23035,24341,39154,28139,32996,34093,38498,38512,38560,38907,21515,21491,23431,28879,32701,36802,38632,21359,40284,31418,1998
const TABLE_JIS0208_ENC = [167=>87,14,176=>74,61,180=>12,182=>182,215=>62,247=>63,913=>470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,931=>487,488,489,490,491,492,493,945=>502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,963=>519,520,521,522,523,524,525,1025=>570,1040=>564,565,566,567,568,569,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,612,613,614,615,616,617,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,1105=>618,8208=>29,8213=>28,8216=>37,38,8220=>39,40,8224=>180,181,8229=>36,35,8240=>176,8242=>75,76,8251=>101,8451=>77,8470=>1193,8481=>1195,8491=>175,8544=>1148,1149,1150,1151,1152,1153,1154,1155,1156,1157,8560=>8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8592=>104,105,103,106,8658=>138,8660=>139,8704=>140,8706=>156,141,8711=>157,119,8715=>120,8721=>1211,8730=>162,8733=>164,70,1215,153,8741=>33,8743=>135,136,126,125,166,167,8750=>1210,8756=>71,165,8765=>163,8786=>159,8800=>65,158,8806=>68,69,8810=>160,161,8834=>123,124,8838=>121,122,8869=>154,8895=>1216,8978=>155,9312=>1128,1129,1130,1131,1132,1133,1134,1135,1136,1137,1138,1139,1140,1141,1142,1143,1144,1145,1146,1147,9472=>658,669,659,670,9484=>660,9487=>671,661,9491=>672,663,9495=>674,662,9499=>673,664,685,9504=>680,9507=>675,666,687,9512=>682,9515=>677,665,9519=>681,686,9523=>676,667,9527=>683,688,9531=>678,668,9535=>684,9538=>689,9547=>679,9632=>96,95,9650=>98,97,9660=>100,99,9670=>94,93,9675=>90,9678=>92,91,9711=>187,9733=>89,88,9792=>73,9794=>72,9834=>179,9837=>178,9839=>177,12288=>0,1,2,22,12293=>24,25,26,49,50,51,52,53,54,55,56,57,58,102,107,43,44,12317=>1191,12319=>1192,12353=>282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,12443=>10,11,20,21,12449=>376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,12539=>5,27,18,19,12849=>1201,1202,12857=>1203,12964=>1196,1197,1198,1199,1200,13059=>1165,13069=>1169,13076=>1160,13080=>1163,13090=>1161,1171,13094=>1170,1164,13099=>1172,13110=>1166,13115=>1174,13129=>1159,1173,13133=>1162,13137=>1167,13143=>1168,13179=>1190,1206,1205,1204,13198=>1178,1179,13212=>1175,1176,1177,13217=>1181,13252=>1180,13261=>1194,19968=>1485,3285,19971=>2560,19975=>4039,2795,2459,2794,1625,19981=>3811,4166,19984=>4419,1518,19988=>1769,4420,2917,4639,1933,3879,19998=>2796,20001=>4259,20006=>3887,20008=>8284,20010=>4421,20013=>3265,20017=>4422,2063,20022=>4423,20024=>1846,3211,20027=>2608,4424,20031=>4425,20034=>4426,3592,20037=>1934,20043=>3594,20045=>3544,2196,3966,20053=>6808,4427,2797,4428,1618,20061=>2042,2241,4114,20066=>4987,20081=>4215,20083=>3570,20094=>1790,20096=>1900,20098=>4429,20101=>4430,4256,20104=>4164,3079,4432,2529,3560,20110=>4435,20113=>1533,2224,20116=>2223,1479,20120=>4366,4365,20123=>2368,1410,20126=>4436,4437,4438,3967,4439,20132=>2243,1480,4029,20136=>1971,20139=>1972,1973,3355,4257,20144=>4440,20147=>4441,20150=>4442,20154=>2864,20160=>2657,2865,4447,20164=>4445,20166=>4446,1935,20170=>2352,1677,20173=>4444,4443,3862,20180=>2481,2480,3131,4448,3812,2984,20189=>23,4449,4451,20193=>8285,20195=>3170,4294,1451,20205=>4450,1627,20208=>2009,20210=>3266,20214=>2146,4452,20219=>3575,8286,20224=>8287,1860,20227=>8288,20233=>4453,1452,20237=>2225,1861,3849,3683,1936,20250=>1678,20252=>4488,3416,20271=>3647,4455,20276=>3693,20278=>4295,20280=>2832,8289,2482,20284=>2530,1629,20291=>3338,20294=>3197,4459,20301=>1453,3356,2658,2369,4139,20307=>3147,20309=>1628,8290,4458,20313=>4165,4454,4456,2427,4457,489
const TABLE_JIS0212 = [108=>728,711,184,729,733,175,731,730,65374,900,901,127=>161,166,191,168=>186,170,169,174,8482,164,8470,534=>902,904,905,906,938,540=>908,542=>910,939,545=>911,550=>940,941,942,943,970,912,972,962,973,971,944,974,597=>1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1038,1039,645=>1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,1118,1119,752=>198,272,755=>294,757=>306,759=>321,319,762=>330,216,338,766=>358,222,784=>230,273,240,295,305,307,312,322,320,329,331,248,339,223,359,254,846=>193,192,196,194,258,461,256,260,197,195,262,264,268,199,266,270,201,200,203,202,282,278,274,280,871=>284,286,290,288,292,205,204,207,206,463,304,298,302,296,308,310,313,317,315,323,327,325,209,211,210,214,212,465,336,332,213,340,344,342,346,348,352,350,356,354,218,217,220,219,364,467,368,362,370,366,360,471,475,473,469,372,221,376,374,377,381,379,940=>225,224,228,226,259,462,257,261,229,227,263,265,269,231,267,271,233,232,235,234,283,279,275,281,501,285,287,968=>289,293,237,236,239,238,464,976=>299,303,297,309,311,314,318,316,324,328,326,241,243,242,246,244,466,337,333,245,341,345,343,347,349,353,351,357,355,250,249,252,251,365,468,369,363,371,367,361,472,476,474,470,373,253,255,375,378,382,380,1410=>19970,19972,19973,19980,19986,19999,20003,20004,20008,20011,20014,20015,20016,20021,20032,20033,20036,20039,20049,20058,20060,20067,20072,20073,20084,20085,20089,20095,20109,20118,20119,20125,20143,20153,20163,20176,20186,20187,20192,20193,20194,20200,20207,20209,20211,20213,20221,20222,20223,20224,20226,20227,20232,20235,20236,20242,20245,20246,20247,20249,20270,20273,20320,20275,20277,20279,20281,20283,20286,20288,20290,20296,20297,20299,20300,20306,20308,20310,20312,20319,20323,20330,20332,20334,20337,20343,20344,20345,20346,20349,20350,20353,20354,20356,20357,20361,20362,20364,20366,20368,20370,20371,20372,20375,20377,20378,20382,20383,20402,20407,20409,20411,20412,20413,20414,20416,20417,20421,20422,20424,20425,20427,20428,20429,20431,20434,20444,20448,20450,20464,20466,20476,20477,20479,20480,20481,20484,20487,20490,20492,20494,20496,20499,20503,20504,20507,20508,20509,20510,20514,20519,20526,20528,20530,20531,20533,20544,20545,20546,20549,20550,20554,20556,20558,20561,20562,20563,20567,20569,20575,20576,20578,20579,20582,20583,20586,20589,20592,20593,20539,20609,20611,20612,20614,20618,20622,20623,20624,20626,20627,20628,20630,20635,20636,20638,20639,20640,20641,20642,20650,20655,20656,20665,20666,20669,20672,20675,20676,20679,20684,20686,20688,20691,20692,20696,20700,20701,20703,20706,20708,20710,20712,20713,20719,20721,20726,20730,20734,20739,20742,20743,20744,20747,20748,20749,20750,20722,20752,20759,20761,20763,20764,20765,20766,20771,20775,20776,20780,20781,20783,20785,20787,20788,20789,20792,20793,20802,20810,20815,20819,20821,20823,20824,20831,20836,20838,20862,20867,20868,20875,20878,20888,20893,20897,20899,20909,20920,20922,20924,20926,20927,20930,20936,20943,20945,20946,20947,20949,20952,20958,20962,20965,20974,20978,20979,20980,20983,20993,20994,20997,21010,21011,21013,21014,21016,21026,21032,21041,21042,21045,21052,21061,21065,21077,21079,21080,21082,21084,21087,21088,21089,21094,21102,21111,21112,21113,21120,21122,21125,21130,21132,21139,21141,21142,21143,21144,21146,21148,21156,21157,21158,21159,21167,21168,21174,21175,21176,21178,21179,21181,21184,21188,21190,21192,21196,21199,21201,21204,21206,21211,21212,21217,21221,21224,21225,21226,21228,21232,21233,21236,21238,21239,21248,21251,21258,21259,21260,21265,21267,21272,21275,21276,21278,21279,21285,21287,21288,21289,21291,21292,21293,21296,21298,21301,21308,21309,21310,21314,21324,21323,21337,21339,21345,21347,21349,21356,21357,21362,21369,21374,21379,21383,21384,21390,21395,21396,21401,21405,21409,21412,21418,21419,21423,21426,21428,21429,21431,21432,21434,21437,21440,21445,21455,21458,21459,21461,21466,21469,21470,21472,21478,21479,21493,21506,21523,21530,21537,21543,21544,21546,21551,21553,21556,21557,21571,21572,21575,21581,21583,21598,21602,21604,21606,21607,21609,21611,21613,21614,21620,21631,21633,21635,21637,21640,21641,21645,21
protected $dirtyEOF = 0;
/** Decodes the next character from the string and returns its code point number
*
* If the end of the string has been reached, false is returned
*
* @return int|bool
*/
public function nextCode() {
$this->posChar++;
$lead = 0x00;
$jis0212 = false;
while (($b = @$this->string[$this->posByte++]) !== "") {
$b = ord($b);
if ($lead == 0) {
if ($b < 0x80) {
return $b;
} elseif ($b < 0x8E || ($b > 0x8F && $b < 0xA1) || $b == 0xFF) {
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 1);
} else {
$lead = $b;
continue;
}
} else {
if ($lead == 0x8E && $b >= 0xA1 && $b <= 0xDF) { // JIS X 0201 character
return 0xFF61 - 0xA1 + $b;
} elseif ($lead == 0x8F && $b >= 0xA1 && $b <= 0xFE) { // three-byte JIS X 0212 character
$jis0212 = true;
$lead = $b;
continue;
6 years ago
}
$pointer = null;
if (($lead >= 0xA1 && $lead <= 0xFE) && ($b >= 0xA1 && $b <= 0xFE)) {
$pointer = ($lead - 0xA1) * 94 + $b - 0xA1;
}
$code = ($jis0212 ? (self::TABLE_JIS0212[$pointer] ?? null) : (self::TABLE_JIS0208_DEC[$pointer]) ?? null);
if (isset($code)) {
return $code;
} else {
if ($b < 0x80) {
return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - (1 + (int) $jis0212));
} else {
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - (2 + (int) $jis0212));
}
}
}
}
$this->posByte--;
if ($lead == 0) {
// clean EOF
$this->posChar--;
return false;
} else {
// dirty EOF
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - (1 + (int) $jis0212));
}
}
/** Returns the encoding of $codePoint as a byte string
*
* If $codePoint is less than 0 or greater than 1114111, an exception is thrown
*
* If $fatal is true, an exception will be thrown if the code point cannot be encoded into a character; otherwise HTML character references will be substituted
*/
public static function encode(int $codePoint, bool $fatal = true): string {
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT);
} elseif ($codePoint < 128) {
return chr($codePoint);
} elseif ($codePoint >= 0xFF61 && $codePoint <= 0xFF9F) {
return chr(0x8E).chr($codePoint - 0xFF61 + 0xA1);
} else {
switch ($codePoint) {
case 0xA5:
return chr(0x5C);
case 0x203E:
return chr(0x7E);
case 0x2212:
$codePoint = 0xFF0D;
// no break;
default:
$pointer = self::TABLE_JIS0208_ENC[$codePoint] ?? null;
if (isset($pointer)) {
$lead = (int) ($pointer / 94) + 0xA1;
$trail = ($pointer % 94) + 0xA1;
return chr($lead).chr($trail);
} else {
return self::errEnc(!$fatal, $codePoint);
}
}
}
}
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
while ($distance > 0 && $this->posByte > 0) {
$this->posChar--;
$distance--;
if ($this->posByte === $this->errMark) { // the previous character was malformed
// move to the correct sync position, pop the error stack, and continue
$this->posByte = $this->errSync;
list($this->errMark, $this->errSync) = array_pop($this->errStack);
continue;
}
// go back one byte
$b1 = ord(@$this->string[--$this->posByte]);
// if the byte is an ASCII byte or the first byte in the string, this is a character
if ($b1 < 0x80 || $this->posByte === 0) { // ASCII bytes are always isolate in EUC-JP
// the byte is a character
continue;
}
// go back a second byte
$b2 = ord(@$this->string[--$this->posByte]);
if ($b2 === 0x8E) { // JIS X 0201 character
// the two bytes form a character
continue;
} elseif ($this->errMark === $this->posByte || $this->posByte === 0) { // an error mark or start of string confirms the start of the character
// the two bytes form a character
continue;
}
// go back a third byte
$b3 = ord(@$this->string[--$this->posByte]);
if ($b3 === 0x8F) { // JIS X 0212 character
// the three bytes form a character
continue;
} else { // JIS X 0208 character
// the previous two bytes formed a character
$this->posByte++;
continue;
}
}
return $distance;
}
}