A set of dependency-free basic internationalization tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
106 KiB

<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class EUCKR extends AbstractEncoding implements Coder, Decoder {
const NAME = "EUC-KR";
const LABELS = [
"cseuckr",
"csksc56011987",
"euc-kr",
"iso-ir-149",
"korean",
"ks_c_5601-1987",
"ks_c_5601-1989",
"ksc5601",
"ksc_5601",
6 years ago
"windows-949",
];
const TABLE_CODES = [44034,44035,44037,44038,44043,44044,44045,44046,44047,44056,44062,44063,44065,44066,44067,44069,44070,44071,44072,44073,44074,44075,44078,44082,44083,44084,32=>44085,44086,44087,44090,44091,44093,44094,44095,44097,44098,44099,44100,44101,44102,44103,44104,44105,44106,44108,44110,44111,44112,44113,44114,44115,44117,64=>44118,44119,44121,44122,44123,44125,44126,44127,44128,44129,44130,44131,44132,44133,44134,44135,44136,44137,44138,44139,44140,44141,44142,44143,44146,44147,44149,44150,44153,44155,44156,44157,44158,44159,44162,44167,44168,44173,44174,44175,44177,44178,44179,44181,44182,44183,44184,44185,44186,44187,44190,44194,44195,44196,44197,44198,44199,44203,44205,44206,44209,44210,44211,44212,44213,44214,44215,44218,44222,44223,44224,44226,44227,44229,44230,44231,44233,44234,44235,44237,44238,44239,44240,44241,44242,44243,44244,44246,44248,44249,44250,44251,44252,44253,44254,44255,44258,44259,44261,44262,44265,44267,44269,44270,44274,44276,44279,44280,44281,44282,44283,44286,44287,44289,44290,44291,44293,44295,44296,44297,44298,44299,44302,44304,44306,44307,44308,44309,44310,44311,44313,44314,44315,44317,44318,44319,44321,44322,44323,44324,44325,44326,44327,44328,44330,44331,44334,44335,44336,44337,44338,44339,222=>44342,44343,44345,44346,44347,44349,44350,44351,44352,44353,44354,44355,44358,44360,44362,44363,44364,44365,44366,44367,44369,44370,44371,44373,44374,44375,254=>44377,44378,44379,44380,44381,44382,44383,44384,44386,44388,44389,44390,44391,44392,44393,44394,44395,44398,44399,44401,44402,44407,44408,44409,44410,44414,44416,44419,44420,44421,44422,44423,44426,44427,44429,44430,44431,44433,44434,44435,44436,44437,44438,44439,44440,44441,44442,44443,44446,44447,44448,44449,44450,44451,44453,44454,44455,44456,44457,44458,44459,44460,44461,44462,44463,44464,44465,44466,44467,44468,44469,44470,44472,44473,44474,44475,44476,44477,44478,44479,44482,44483,44485,44486,44487,44489,44490,44491,44492,44493,44494,44495,44498,44500,44501,44502,44503,44504,44505,44506,44507,44509,44510,44511,44513,44514,44515,44517,44518,44519,44520,44521,44522,44523,44524,44525,44526,44527,44528,44529,44530,44531,44532,44533,44534,44535,44538,44539,44541,44542,44546,44547,44548,44549,44550,44551,44554,44556,44558,44559,44560,44561,44562,44563,44565,44566,44567,44568,44569,44570,44571,44572,412=>44573,44574,44575,44576,44577,44578,44579,44580,44581,44582,44583,44584,44585,44586,44587,44588,44589,44590,44591,44594,44595,44597,44598,44601,44603,44604,444=>44605,44606,44607,44610,44612,44615,44616,44617,44619,44623,44625,44626,44627,44629,44631,44632,44633,44634,44635,44638,44642,44643,44644,44646,44647,44650,44651,44653,44654,44655,44657,44658,44659,44660,44661,44662,44663,44666,44670,44671,44672,44673,44674,44675,44678,44679,44680,44681,44682,44683,44685,44686,44687,44688,44689,44690,44691,44692,44693,44694,44695,44696,44697,44698,44699,44700,44701,44702,44703,44704,44705,44706,44707,44708,44709,44710,44711,44712,44713,44714,44715,44716,44717,44718,44719,44720,44721,44722,44723,44724,44725,44726,44727,44728,44729,44730,44731,44735,44737,44738,44739,44741,44742,44743,44744,44745,44746,44747,44750,44754,44755,44756,44757,44758,44759,44762,44763,44765,44766,44767,44768,44769,44770,44771,44772,44773,44774,44775,44777,44778,44780,44782,44783,44784,44785,44786,44787,44789,44790,44791,44793,44794,44795,44797,44798,44799,44800,44801,44802,44803,44804,44805,602=>44806,44809,44810,44811,44812,44814,44815,44817,44818,44819,44820,44821,44822,44823,44824,44825,44826,44827,44828,44829,44830,44831,44832,44833,44834,44835,634=>44836,44837,44838,44839,44840,44841,44842,44843,44846,44847,44849,44851,44853,44854,44855,44856,44857,44858,44859,44862,44864,44868,44869,44870,44871,44874,44875,44876,44877,44878,44879,44881,44882,44883,44884,44885,44886,44887,44888,44889,44890,44891,44894,44895,44896,44897,44898,44899,44902,44903,44904,44905,44906,44907,44908,44909,44910,44911,44912,44913,44914,44915,44916,44917,44918,44919,44920,44922,44923,44924,44925,44926,44927,44929,44930,44931,44933,44934,44935,44937,44938,44939,44940,44941,44942,44943,
protected static $pointerCache;
public function nextCode() {
$this->posChar++;
$lead = 0x00;
while (($b = @$this->string[$this->posByte++]) !== "") {
$b = ord($b);
if ($lead == 0) {
if ($b < 0x80) {
return $b;
} elseif ($b == 0x80 || $b == 0xFF) {
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 1);
} else {
$lead = $b;
continue;
}
} else {
$pointer = null;
if (($b >= 0x41 && $b <= 0xFE) || ($b >= 0xA1 && $b <= 0xFE)) {
$pointer = ($lead - 0x81) * 190 + ($b - 0x41);
}
$code = self::TABLE_CODES[$pointer] ?? null;
if (isset($code)) {
return $code;
} else {
if ($b < 0x80) {
return $this->errDec($this->errMode, $this->posChar -1, --$this->posByte - 1);
} else {
return $this->errDec($this->errMode, $this->posChar -1, $this->posByte - 2);
}
}
}
}
$this->posByte--;
if ($lead == 0) {
// clean EOF
$this->posChar--;
return false;
} else {
// dirty EOF
$this->dirtyEOF = 1;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
}
}
public static function encode(int $codePoint, bool $fatal = true): string {
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT);
} elseif ($codePoint < 128) {
return chr($codePoint);
} else {
$pointer = (self::$pointerCache ?? (self::$pointerCache = array_flip(self::TABLE_CODES)))[$codePoint] ?? null; // this is safe: the EUC-KR index has no duplicates
if (isset($pointer)) {
$lead = (int) ($pointer / 190) + 0x81;
$trail = ($pointer % 190) + 0x41;
return chr($lead).chr($trail);
} else {
return self::errEnc(!$fatal, $codePoint);
}
}
}
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
while ($distance > 0 && $this->posByte > 0) {
$distance--;
$this->posChar--;
if ($this->posByte === $this->errMark) { // the previous character was malformed
// move to the correct sync position, pop the error stack, and continue
$this->posByte = $this->errSync;
list($this->errMark, $this->errSync) = array_pop($this->errStack);
continue;
}
// go back one byte
$b1 = ord(@$this->string[--$this->posByte]);
if ($b1 < 0x41 || $this->posByte === $this->errMark || $this->posByte == 0) { // these bytes never appear in sequences, a byte coming after an error is necessarily its own character, and the first byte is necessarily the start of a sequence
// the byte is a character
continue;
}
// go back a second byte
$b2 = ord(@$this->string[--$this->posByte]);
if ($b2 < 0x80) { // these bytes never appear in the lead of a sequence
// the first byte was a character
$this->posByte += 1;
continue;
} elseif ($b1 > 0x7F && $b2 > 0x7F) { // two non-ASCII bytes in a row with no error necessarily form a sequence
// the second byte is a charactrer
continue;
} else { // the second byte is part of a two-byte sequence, but it's unclear if it's the lead or trail byte
$start = $this->posByte + 2;
$pos = $this->posByte;
// go back bytes until an error mark, an ASCII byte, or start of string
while ($pos > 0 && $pos > $this->errMark) {
$b = ord(@$this->string[--$pos]);
if ($b < 0x80) {
$pos++;
break;
}
}
if (($start - $pos) % 2) { // the number of bytes is odd
// the first byte was a character
$this->posByte += 1;
continue;
} else { // the number of bytes is even
// the second byte was a character
continue;
}
}
}
return $distance;
}
}