A set of dependency-free basic internationalization tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
90 KiB

<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class EUCJP extends AbstractEncoding implements Coder, Decoder {
const NAME = "EUC-JP";
const LABELS = [
4 years ago
"cseucpkdfmtjapanese",
"euc-jp",
"x-euc-jp",
];
const TABLE_JIS0208 = [12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,12295,12540,8213,8208,65295,65340,65374,8741,65372,8230,8229,8216,8217,8220,8221,65288,65289,12308,12309,65339,65341,65371,65373,12296,12297,12298,12299,12300,12301,12302,12303,12304,12305,65291,65293,177,215,247,65309,8800,65308,65310,8806,8807,8734,8756,9794,9792,176,8242,8243,8451,65509,65284,65504,65505,65285,65283,65286,65290,65312,167,9734,9733,9675,9679,9678,9671,9670,9633,9632,9651,9650,9661,9660,8251,12306,8594,8592,8593,8595,12307,119=>8712,8715,8838,8839,8834,8835,8746,8745,135=>8743,8744,65506,8658,8660,8704,8707,153=>8736,8869,8978,8706,8711,8801,8786,8810,8811,8730,8765,8733,8757,8747,8748,175=>8491,8240,9839,9837,9834,8224,8225,182,187=>9711,203=>65296,65297,65298,65299,65300,65301,65302,65303,65304,65305,220=>65313,65314,65315,65316,65317,65318,65319,65320,65321,65322,65323,65324,65325,65326,65327,65328,65329,65330,65331,65332,65333,65334,65335,65336,65337,65338,252=>65345,65346,65347,65348,65349,65350,65351,65352,65353,65354,65355,65356,65357,65358,65359,65360,65361,65362,65363,65364,65365,65366,65367,65368,65369,65370,282=>12353,12354,12355,12356,12357,12358,12359,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369,12370,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384,12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400,12401,12402,12403,12404,12405,12406,12407,12408,12409,12410,12411,12412,12413,12414,12415,12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431,12432,12433,12434,12435,376=>12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462,12463,12464,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477,12478,12479,12480,12481,12482,12483,12484,12485,12486,12487,12488,12489,12490,12491,12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507,12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523,12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,470=>913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,931,932,933,934,935,936,937,502=>945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,963,964,965,966,967,968,969,564=>1040,1041,1042,1043,1044,1045,1025,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,612=>1072,1073,1074,1075,1076,1077,1105,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,658=>9472,9474,9484,9488,9496,9492,9500,9516,9508,9524,9532,9473,9475,9487,9491,9499,9495,9507,9523,9515,9531,9547,9504,9519,9512,9527,9535,9501,9520,9509,9528,9538,1128=>9312,9313,9314,9315,9316,9317,9318,9319,9320,9321,9322,9323,9324,9325,9326,9327,9328,9329,9330,9331,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,1159=>13129,13076,13090,13133,13080,13095,13059,13110,13137,13143,13069,13094,13091,13099,13130,13115,13212,13213,13214,13198,13199,13252,13217,1190=>13179,12317,12319,8470,13261,8481,12964,12965,12966,12967,12968,12849,12850,12857,13182,13181,13180,8786,8801,8747,8750,8721,8730,8869,8736,8735,8895,8757,8745,8746,1410=>20124,21782,23043,38463,21696,24859,25384,23030,36898,33909,33564,31312,24746,25569,28197,26093,33894,33446,39925,26771,22311,26017,25201,23451,22992,34427,39156,32098,32190,39822,25110,31903,34999,23433,24245,25353,26263,26696,38343,38797,26447,20197,20234,20301,20381,20553,22258,22839,22996,23041,23561,24799,24847,24944,26131,26885,28858,30031,30064,31227,32173,32239,32963,33806,34915,35586,36949,36986,21307,20117,20133,22495,32946,37057,30959,19968,22769,28322,36920,31282,33576,33419,39983,20801,21360,21693,21729,22240,23035,24341,39154,28139,32996,34093,38498,38512,38560,38907,21515,21491,23431,28879,32701,36802,38632,21359,40284,31418,19985,30
const TABLE_JIS0212 = [108=>728,711,184,729,733,175,731,730,65374,900,901,127=>161,166,191,168=>186,170,169,174,8482,164,8470,534=>902,904,905,906,938,540=>908,542=>910,939,545=>911,550=>940,941,942,943,970,912,972,962,973,971,944,974,597=>1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1038,1039,645=>1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,1118,1119,752=>198,272,755=>294,757=>306,759=>321,319,762=>330,216,338,766=>358,222,784=>230,273,240,295,305,307,312,322,320,329,331,248,339,223,359,254,846=>193,192,196,194,258,461,256,260,197,195,262,264,268,199,266,270,201,200,203,202,282,278,274,280,871=>284,286,290,288,292,205,204,207,206,463,304,298,302,296,308,310,313,317,315,323,327,325,209,211,210,214,212,465,336,332,213,340,344,342,346,348,352,350,356,354,218,217,220,219,364,467,368,362,370,366,360,471,475,473,469,372,221,376,374,377,381,379,940=>225,224,228,226,259,462,257,261,229,227,263,265,269,231,267,271,233,232,235,234,283,279,275,281,501,285,287,968=>289,293,237,236,239,238,464,976=>299,303,297,309,311,314,318,316,324,328,326,241,243,242,246,244,466,337,333,245,341,345,343,347,349,353,351,357,355,250,249,252,251,365,468,369,363,371,367,361,472,476,474,470,373,253,255,375,378,382,380,1410=>19970,19972,19973,19980,19986,19999,20003,20004,20008,20011,20014,20015,20016,20021,20032,20033,20036,20039,20049,20058,20060,20067,20072,20073,20084,20085,20089,20095,20109,20118,20119,20125,20143,20153,20163,20176,20186,20187,20192,20193,20194,20200,20207,20209,20211,20213,20221,20222,20223,20224,20226,20227,20232,20235,20236,20242,20245,20246,20247,20249,20270,20273,20320,20275,20277,20279,20281,20283,20286,20288,20290,20296,20297,20299,20300,20306,20308,20310,20312,20319,20323,20330,20332,20334,20337,20343,20344,20345,20346,20349,20350,20353,20354,20356,20357,20361,20362,20364,20366,20368,20370,20371,20372,20375,20377,20378,20382,20383,20402,20407,20409,20411,20412,20413,20414,20416,20417,20421,20422,20424,20425,20427,20428,20429,20431,20434,20444,20448,20450,20464,20466,20476,20477,20479,20480,20481,20484,20487,20490,20492,20494,20496,20499,20503,20504,20507,20508,20509,20510,20514,20519,20526,20528,20530,20531,20533,20544,20545,20546,20549,20550,20554,20556,20558,20561,20562,20563,20567,20569,20575,20576,20578,20579,20582,20583,20586,20589,20592,20593,20539,20609,20611,20612,20614,20618,20622,20623,20624,20626,20627,20628,20630,20635,20636,20638,20639,20640,20641,20642,20650,20655,20656,20665,20666,20669,20672,20675,20676,20679,20684,20686,20688,20691,20692,20696,20700,20701,20703,20706,20708,20710,20712,20713,20719,20721,20726,20730,20734,20739,20742,20743,20744,20747,20748,20749,20750,20722,20752,20759,20761,20763,20764,20765,20766,20771,20775,20776,20780,20781,20783,20785,20787,20788,20789,20792,20793,20802,20810,20815,20819,20821,20823,20824,20831,20836,20838,20862,20867,20868,20875,20878,20888,20893,20897,20899,20909,20920,20922,20924,20926,20927,20930,20936,20943,20945,20946,20947,20949,20952,20958,20962,20965,20974,20978,20979,20980,20983,20993,20994,20997,21010,21011,21013,21014,21016,21026,21032,21041,21042,21045,21052,21061,21065,21077,21079,21080,21082,21084,21087,21088,21089,21094,21102,21111,21112,21113,21120,21122,21125,21130,21132,21139,21141,21142,21143,21144,21146,21148,21156,21157,21158,21159,21167,21168,21174,21175,21176,21178,21179,21181,21184,21188,21190,21192,21196,21199,21201,21204,21206,21211,21212,21217,21221,21224,21225,21226,21228,21232,21233,21236,21238,21239,21248,21251,21258,21259,21260,21265,21267,21272,21275,21276,21278,21279,21285,21287,21288,21289,21291,21292,21293,21296,21298,21301,21308,21309,21310,21314,21324,21323,21337,21339,21345,21347,21349,21356,21357,21362,21369,21374,21379,21383,21384,21390,21395,21396,21401,21405,21409,21412,21418,21419,21423,21426,21428,21429,21431,21432,21434,21437,21440,21445,21455,21458,21459,21461,21466,21469,21470,21472,21478,21479,21493,21506,21523,21530,21537,21543,21544,21546,21551,21553,21556,21557,21571,21572,21575,21581,21583,21598,21602,21604,21606,21607,21609,21611,21613,21614,21620,21631,21633,21635,21637,21640,21641,21645,21
const TABLE_POINTERS = [8470=>1193,8481=>1195,8544=>1148,1149,1150,1151,1152,1153,1154,1155,1156,1157,8560=>8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8730=>162,8736=>153,8745=>126,125,166,8757=>165,8786=>159,8801=>158,8869=>154,12849=>1201,20008=>8284,20193=>8285,20220=>8286,20224=>8287,20227=>8288,20281=>8289,20310=>8290,20362=>8292,20370=>8291,20372=>8294,20378=>8293,20425=>8277,20429=>8295,20479=>8298,20510=>8299,20514=>8297,20544=>8296,20546=>8302,20550=>8300,20592=>8301,20628=>8303,20696=>8305,20724=>8304,20810=>8306,20836=>8307,20893=>8308,20926=>8309,20972=>8310,21013=>8311,21148=>8312,21158=>8313,21167=>8487,21184=>8314,21211=>8315,21248=>8316,21255=>8317,21284=>8318,21362=>8319,21395=>8320,21426=>8321,21469=>8322,21642=>8325,21660=>8324,21673=>8326,21759=>8327,21894=>8328,22361=>8329,22373=>8330,22444=>8331,22471=>8333,8332,22686=>8336,22706=>8337,22795=>8338,22867=>8339,22875=>8340,22877=>8341,22883=>8342,22948=>8343,22970=>8344,23382=>8345,23488=>8346,23512=>8348,23532=>8349,23582=>8350,23718=>8351,23738=>8352,23797=>8353,23847=>8354,23874=>8357,23891=>8355,23917=>8358,23992=>8359,8360,24016=>8361,24353=>8362,24372=>8363,24389=>8283,24423=>8364,24503=>8365,24542=>8366,24669=>8367,24709=>8368,24714=>8369,24789=>8371,24798=>8370,24818=>8373,24849=>8374,24864=>8372,24880=>8376,24887=>8375,24984=>8377,25107=>8378,25254=>8379,25589=>8380,25696=>8381,25757=>8382,25806=>8383,25934=>8384,26112=>8385,26121=>8388,26133=>8386,26142=>8390,26148=>8391,26158=>8389,26161=>8279,26171=>8387,26199=>8393,26201=>8394,26213=>8392,26227=>8396,26265=>8397,26272=>8398,26290=>8399,26303=>8400,26362=>8401,8282,26382=>8402,26470=>8404,26555=>8405,26560=>8407,26625=>8408,26692=>8409,26706=>8406,26824=>8280,26831=>8410,26984=>8412,27032=>8414,27106=>8415,27184=>8416,27206=>8418,27243=>8417,27251=>8419,27262=>8420,27362=>8421,27364=>8422,27606=>8423,27711=>8424,27740=>8425,27759=>8427,27782=>8426,27866=>8428,27908=>8429,28015=>8431,28039=>8430,28054=>8432,28076=>8433,28111=>8434,28146=>8436,28152=>8435,28156=>8437,28199=>8440,28217=>8438,28220=>8441,28252=>8439,28351=>8442,28552=>8443,28597=>8444,28661=>8445,28677=>8446,28679=>8447,28712=>8448,28805=>8449,28843=>8450,28859=>8278,28932=>8452,28943=>8451,28998=>8454,8455,29020=>8453,29121=>8457,29182=>8458,29361=>8459,29374=>8460,29476=>8461,29559=>8463,29629=>8464,29641=>8465,29650=>8468,29654=>8466,29667=>8467,29685=>8470,29703=>8469,29734=>8471,29737=>8473,8472,29742=>8474,29794=>8475,29833=>8476,29855=>8477,29953=>8478,29999=>8347,30063=>8479,30338=>8480,30363=>8483,8481,30366=>8482,30374=>8484,30534=>8486,30753=>8488,30798=>8489,30820=>8490,30842=>8491,31024=>8492,31124=>8496,31131=>8498,31441=>8499,31463=>8500,31467=>8502,31646=>8503,32072=>8505,32092=>8506,32160=>8508,32183=>8507,32214=>8509,32338=>8510,32394=>8272,32583=>8511,32673=>8512,33537=>8514,33634=>8515,33663=>8516,33735=>8517,33782=>8518,33864=>8519,33972=>8520,34012=>8276,34131=>8521,34137=>8522,34155=>8523,34224=>8525,34823=>8528,35061=>8529,35100=>8273,35346=>8530,35383=>8531,35449=>8532,35495=>8533,35518=>8534,35551=>8535,35574=>8537,35667=>8538,35711=>8539,36080=>8540,36084=>8541,36114=>8542,36214=>8543,36559=>8545,36967=>8548,37086=>8549,37141=>8551,37159=>8552,37335=>8554,37338=>8553,37342=>8555,37348=>8558,8559,37357=>8556,8557,37382=>8560,37386=>8562,37392=>8561,37433=>8569,8563,37436=>8565,37440=>8564,37454=>8566,37457=>8568,37465=>8567,37479=>8570,37495=>8572,8573,37512=>8275,37543=>8571,37584=>8577,37587=>8581,37589=>8579,37591=>8575,37593=>8576,37600=>8580,37607=>8574,37625=>8281,37627=>8584,37631=>8587,37634=>8589,37661=>8588,8586,37665=>8583,37669=>8582,37704=>8274,37719=>8591,37744=>8590,37796=>8592,37830=>8593,37854=>8594,37880=>8595,37937=>8596,37957=>8597,37960=>8598,38290=>8599,38557=>8602,38575=>8603,38707=>8604,38715=>8605,38723=>8606,38733=>8607,38735=>8608,38737=>8609,38741=>8610,38999=>8611,39013=>8612,39207=>8615,39326=>8617,39502=>8618,39641=>8619,39644=>8620,39794=>8622,39797=>8621,39823=>8623,39857=>8624,39867=>8625,39936=>8626,40299=>8628,40304=>8627,40473=>8630,40657=>8631,63785=>8
protected static $pointerCache;
/** Decodes the next character from the string and returns its code point number
*
* If the end of the string has been reached, false is returned
*
* @return int|bool
*/
public function nextCode() {
$this->posChar++;
$lead = 0x00;
$jis0212 = false;
while (($b = @$this->string[$this->posByte++]) !== "") {
$b = ord($b);
if ($lead == 0) {
if ($b < 0x80) {
return $b;
} elseif ($b < 0x8E || ($b > 0x8F && $b < 0xA1) || $b == 0xFF) {
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 1);
} else {
$lead = $b;
continue;
}
} else {
if ($lead == 0x8E && $b >= 0xA1 && $b <= 0xDF) { // JIS X 0201 character
return 0xFF61 - 0xA1 + $b;
} elseif ($lead == 0x8F && $b >= 0xA1 && $b <= 0xFE) { // three-byte JIS X 0212 character
$jis0212 = true;
$lead = $b;
continue;
6 years ago
}
$pointer = null;
if (($lead >= 0xA1 && $lead <= 0xFE) && ($b >= 0xA1 && $b <= 0xFE)) {
$pointer = ($lead - 0xA1) * 94 + $b - 0xA1;
}
$code = ($jis0212 ? (self::TABLE_JIS0212[$pointer] ?? null) : (self::TABLE_JIS0208[$pointer]) ?? null);
if (isset($code)) {
return $code;
} else {
if ($b < 0x80) {
return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - (1 + (int) $jis0212));
} else {
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - (2 + (int) $jis0212));
}
}
}
}
$this->posByte--;
if ($lead == 0) {
// clean EOF
$this->posChar--;
return false;
} else {
// dirty EOF
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - (1 + (int) $jis0212));
}
}
/** Returns the encoding of $codePoint as a byte string
*
* If $codePoint is less than 0 or greater than 1114111, an exception is thrown
*
* If $fatal is true, an exception will be thrown if the code point cannot be encoded into a character; otherwise HTML character references will be substituted
*/
public static function encode(int $codePoint, bool $fatal = true): string {
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT);
} elseif ($codePoint < 128) {
return chr($codePoint);
} elseif ($codePoint >= 0xFF61 && $codePoint <= 0xFF9F) {
return chr(0x8E).chr($codePoint - 0xFF61 + 0xA1);
} else {
switch ($codePoint) {
case 0xA5:
return chr(0x5C);
case 0x203E:
return chr(0x7E);
case 0x2212:
$codePoint = 0xFF0D;
// no break;
default:
$pointer = self::TABLE_POINTERS[$codePoint] ?? (self::$pointerCache ?? (self::$pointerCache = array_flip(self::TABLE_JIS0208)))[$codePoint] ?? null;
if (isset($pointer)) {
$lead = (int) ($pointer / 94) + 0xA1;
$trail = ($pointer % 94) + 0xA1;
return chr($lead).chr($trail);
} else {
return self::errEnc(!$fatal, $codePoint);
}
}
}
}
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
while ($distance > 0 && $this->posByte > 0) {
$this->posChar--;
$distance--;
if ($this->posByte === $this->errMark) { // the previous character was malformed
// move to the correct sync position, pop the error stack, and continue
$this->posByte = $this->errSync;
list($this->errMark, $this->errSync) = array_pop($this->errStack);
continue;
}
// go back one byte
$b1 = ord(@$this->string[--$this->posByte]);
// if the byte is an ASCII byte or the first byte in the string, this is a character
if ($b1 < 0x80 || $this->posByte === 0) { // ASCII bytes are always isolate in EUC-JP
// the byte is a character
continue;
}
// go back a second byte
$b2 = ord(@$this->string[--$this->posByte]);
if ($b2 === 0x8E) { // JIS X 0201 character
// the two bytes form a character
continue;
} elseif ($this->errMark === $this->posByte || $this->posByte === 0) { // an error mark or start of string confirms the start of the character
// the two bytes form a character
continue;
}
// go back a third byte
$b3 = ord(@$this->string[--$this->posByte]);
if ($b3 === 0x8F) { // JIS X 0212 character
// the three bytes form a character
continue;
} else { // JIS X 0208 character
// the previous two bytes formed a character
$this->posByte++;
continue;
}
}
return $distance;
}
}