A set of dependency-free basic internationalization tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
226 KiB

<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Big5 implements StatelessEncoding {
use GenericEncoding;
const NAME = "Big5";
const LABELS = ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"];
const TABLE_CODES = [942=>17392,19506,17923,17830,17784,160359,19831,17843,162993,19682,163013,15253,18230,18244,19527,19520,148159,144919,160594,159371,159954,19543,172881,18255,17882,19589,162924,19719,19108,18081,158499,29221,154196,137827,146950,147297,26189,22267,981=>32149,22813,166841,15860,38708,162799,23515,138590,23204,13861,171696,23249,23479,23804,26478,34195,170309,29793,29853,14453,138579,145054,155681,16108,153822,15093,31484,40855,147809,166157,143850,133770,143966,17162,33924,40854,37935,18736,34323,22678,38730,37400,31184,31282,26208,27177,34973,29772,31685,26498,31276,21071,36934,13542,29636,155065,29894,40903,22451,18735,21580,16689,145038,22552,31346,162661,35727,18094,159368,16769,155033,31662,140476,40904,140481,140489,140492,40905,34052,144827,16564,40906,17633,175615,25281,28782,40907,1099=>12736,12737,12738,12739,12740,131340,12741,131281,131277,12742,12743,131275,139240,12744,131274,12745,12746,12747,12748,131342,12749,12750,256,193,461,192,274,201,282,200,332,211,465,210,1134=>7870,1136=>7872,202,257,225,462,224,593,275,233,283,232,299,237,464,236,333,243,466,242,363,250,468,249,470,472,474,476,252,1165=>7871,1167=>7873,234,609,9178,9179,1256=>172969,135493,1259=>25866,1262=>20029,28381,40270,37343,1268=>161589,25745,20250,20264,20392,20822,20852,20892,20964,21153,21160,21307,21326,21457,21464,22242,22768,22788,22791,22834,22836,23398,23454,23455,23706,24198,24635,25993,26622,26628,26725,27982,28860,30005,32420,32428,32442,32455,32463,32479,32518,32567,33402,33487,33647,35270,35774,35810,36710,36711,36718,29713,31996,32205,26950,31433,21031,1329=>37260,30904,37214,32956,1334=>36107,33014,133607,1339=>32927,40647,19661,40393,40460,19518,171510,159758,40458,172339,13761,1351=>28314,33342,29977,1355=>18705,39532,39567,40857,31111,164972,138698,132560,142054,20004,20097,20096,20103,20159,20203,20279,13388,20413,15944,20483,20616,13437,13459,13477,20870,22789,20955,20988,20997,20105,21113,21136,21287,13767,21417,13649,21424,13651,21442,21539,13677,13682,13953,21651,21667,21684,21689,21712,21743,21784,21795,21800,13720,21823,13733,13759,21975,13765,163204,21797,1416=>134210,134421,151851,21904,142534,14828,131905,36422,150968,169189,16467,164030,30586,142392,14900,18389,164189,158194,151018,25821,134524,135092,134357,135412,25741,36478,134806,134155,135012,142505,164438,148691,1449=>134470,170573,164073,18420,151207,142530,39602,14951,169460,16365,13574,152263,169940,161992,142660,40302,38933,1467=>17369,155813,25780,21731,142668,142282,135287,14843,135279,157402,157462,162208,25834,151634,134211,36456,139681,166732,132913,1487=>18443,131497,16378,22643,142733,1493=>148936,132348,155799,134988,134550,21881,16571,17338,1502=>19124,141926,135325,33194,39157,134556,25465,14846,141173,36288,22177,25724,15939,1516=>173569,134665,142031,142537,1521=>135368,145858,14738,14854,164507,13688,155209,139463,22098,134961,142514,169760,13500,27709,151099,1538=>161140,142987,139784,173659,167117,134778,134196,157724,32659,135375,141315,141625,13819,152035,134796,135053,134826,16275,134960,134471,135503,134732,1561=>134827,134057,134472,135360,135485,16377,140950,25650,135085,144372,161337,142286,134526,134527,142417,142421,14872,134808,135367,134958,173618,158544,167122,167321,167114,38314,21708,33476,21945,1591=>171715,39974,39606,161630,142830,28992,33133,33004,23580,157042,33076,14231,21343,164029,37302,134906,134671,134775,134907,13789,151019,13833,134358,22191,141237,135369,134672,134776,135288,135496,164359,136277,134777,151120,142756,23124,135197,135198,135413,135414,22428,134673,161428,164557,135093,134779,151934,14083,135094,135552,152280,172733,149978,137274,147831,164476,22681,21096,13850,153405,31666,23400,18432,19244,40743,18919,39967,39821,154484,143677,22011,13810,22153,20008,22786,138177,194680,38737,131206,20059,20155,13630,23587,24401,24516,14586,25164,25909,27514,27701,27706,28780,29227,20012,29357,149737,32594,31035,31993,32595,156266,13505,1694=>156491,32770,32896,157202,158033,21341,34916,35265,161970,35744,36125,38021,38264,38271,38376,167439,38886,39029,39118,39134,39267,170000,40060,4
const TABLE_DOUBLES = [
1133 => [0x00CA, 0x0304],
1135 => [0x00CA, 0x030C],
1164 => [0x00EA, 0x0304],
1166 => [0x00EA, 0x030C],
];
const TABLE_ENC = [167=>5103,10951,175=>5120,5205,5137,183=>5040,215=>5135,247=>5136,11300,331=>11301,339=>11299,592=>11295,596=>11297,603=>11296,618=>11303,629=>11298,643=>11294,650=>11302,710=>10952,5430,713=>5428,5429,5431,717=>5123,729=>5427,913=>5342,5343,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,931=>5359,5360,5361,5362,5363,5364,5365,945=>5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,963=>5383,5384,5385,5386,5387,5388,5389,1025=>11141,1040=>11135,11136,11137,11138,11139,11140,11142,11143,11144,11145,11146,11147,11148,11149,11150,11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,11162,11163,11164,11165,11166,11167,11168,11169,11170,11171,11172,11173,11175,11176,11177,11178,11179,11180,11181,11182,11183,11184,11185,11186,11187,11188,11189,11190,11191,11192,11193,11194,11195,11196,11197,11198,11199,11200,1105=>11174,8211=>5046,5048,8216=>5091,5092,8220=>5093,5094,8229=>5036,5035,5029,8242=>5098,8245=>5097,8251=>5102,8364=>5465,8451=>5191,8453=>5119,8457=>5192,8470=>11259,8481=>11260,8544=>5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,8560=>10916,10917,10918,10919,10920,10921,10922,10923,10924,10925,8592=>5172,5170,5173,5171,8598=>5174,5175,5177,5176,8632=>11202,11203,8679=>11201,8725=>5182,8730=>5138,8734=>5145,5158,5157,8739=>5179,8741=>5178,8745=>5154,5155,5162,8750=>5163,8756=>5165,5164,8786=>5146,8800=>5144,5147,8806=>5142,5143,8853=>5168,8857=>5169,8869=>5156,8895=>5159,9216=>5432,5433,5434,5435,5436,5437,5438,5439,5440,5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461,5462,5463,9249=>5464,9312=>10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,9332=>10906,10907,10908,10909,10910,10911,10912,10913,10914,10915,9472=>5236,9474=>5237,9484=>5239,9488=>5240,9492=>5241,9496=>5242,9500=>5234,9508=>5233,9516=>5232,9524=>5231,9532=>5230,9552=>18991,18990,18972,18981,18963,18974,18983,18965,18978,18987,18969,18980,18989,18971,18975,18984,18966,5250,18986,18968,18973,18982,18964,18979,18988,18970,18976,18985,18967,5243,5244,5246,5245,5255,5256,5257,5050,9601=>5215,5216,5217,5218,5219,5220,5221,5222,5229,5228,5227,5226,5225,5224,5223,9620=>5235,5238,9632=>5115,5114,9650=>5108,5107,9660=>5117,5116,9670=>5113,5112,9675=>5105,9678=>5109,5106,9698=>5251,5252,5254,5253,9733=>5111,5110,9792=>5166,9794=>5167,10045=>10965,11904=>11263,11908=>11264,11910=>11265,11266,11267,11914=>11268,11916=>11269,11270,11925=>11271,11932=>11272,11273,11941=>11274,11943=>11275,11946=>11276,11948=>11277,11950=>11278,11958=>11279,11964=>11280,11966=>11281,11974=>11282,11978=>11283,11980=>11284,11285,11983=>11286,11990=>11287,11288,11998=>11289,12003=>11290,12083=>10940,12288=>5024,5026,5027,5104,12293=>10959,10960,10961,5073,5074,5069,5070,5077,5078,5081,5082,5065,5066,5186,12308=>5061,5062,12317=>5095,5096,12321=>5278,5279,5280,5281,5282,5283,5284,5285,5286,12353=>10966,10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982,10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998,10999,11000,11001,11002,11003,11004,11005,11006,11007,11008,11009,11010,11011,11012,11013,11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029,11030,11031,11032,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044,11045,11046,11047,11048,12443=>11261,11262,10955,10956,12449=>11049,11050,11051,11052,11053,11054,11055,11056,11057,11058,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,11072,11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088,11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104,11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,11116,11117,11118,11119,11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,12540=>10962,10953,10954,12549=>5390,5391,5392,5393,5394,5395,5396,5397,5398,5399,5400,5401,5402
protected $bufferedCode = 0;
public function nextCode() {
$this->posChar++;
if ($this->bufferedCode > 0) {
// certain sequences return two characters, the second of which is buffered without moving the byte pointer
// if a code is buffered, then we move the pointer forward two bytes, clear the buffer, and return the code
$code = $this->bufferedCode;
$this->bufferedCode = 0;
$this->posByte += 2;
return $code;
}
$lead = 0x00;
while (($b = @$this->string[$this->posByte++]) !== "") {
$b = ord($b);
if ($lead == 0) {
if ($b < 0x80) {
return $b;
} elseif ($b == 0x80 || $b == 0xFF) {
return self::err($this->errMode, [$this->posChar -1, $this->posByte - 1]);
} else {
$lead = $b;
continue;
}
} else {
$pointer = null;
$offset = $b < 0x7F ? 0x40 : 0x62;
if (($b >= 0x40 && $b <= 0x7E) || ($b >= 0xA1 && $b <= 0xFE)) {
$pointer = ($lead - 0x81) * 157 + ($b - $offset);
}
if (isset(self::TABLE_DOUBLES[$pointer])) {
// special double-character sequence
// unconsume both bytes, buffer the second character, and return the first
$this->posByte -= 2;
$this->bufferedCode = self::TABLE_DOUBLES[$pointer][1];
return self::TABLE_DOUBLES[$pointer][0];
}
$code = self::TABLE_CODES[$pointer] ?? null;
if (isset($code)) {
return $code;
} else {
if ($b < 0x80) {
$this->posErr = $this->posChar;
return self::err($this->errMode, [$this->posChar -1, --$this->posByte - 1]);
} else {
$this->posErr = $this->posChar;
return self::err($this->errMode, [$this->posChar -1, $this->posByte - 2]);
}
}
}
}
$this->posByte--;
if ($lead == 0) {
// clean EOF
$this->posChar--;
return false;
} else {
// dirty EOF
$this->dirtyEOF = 1;
$this->posErr = $this->posChar;
return self::err($this->errMode, [$this->posChar - 1, $this->posByte - $this->dirtyEOF]);
}
}
public static function encode(int $codePoint, bool $fatal = true): string {
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT);
} elseif ($codePoint < 128) {
return chr($codePoint);
} else {
$pointer = self::TABLE_ENC[$codePoint] ?? null;
if (isset($pointer)) {
$lead = (int) ($pointer / 157) + 0x81;
$trail = $pointer % 157;
$offset = ($trail < 0x3F) ? 0x40 : 0x62;
return chr($lead).chr($trail + $offset);
} else {
return self::err($fatal ? self::MODE_FATAL_ENC : self::MODE_HTML, $codePoint);
}
}
}
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
while ($distance > 0 && ($this->posByte > 0 || $this->bufferedCode > 0)) {
$distance--;
$this->posChar--;
if ($this->bufferedCode > 0) { // we're in the middle of a special double-character sequence
// clear the buffered character and continue
$this->bufferedCode = 0;
continue;
}
// go back one byte
$b1 = ord(@$this->string[--$this->posByte]);
if ($b1 < 0x40 || $b1 == 0x7F || $this->posByte == 0) { // these bytes never appear in sequences, and the first byte is necessarily the start of a sequence
// the byte is a character
continue;
}
// go back a second byte
$b2 = ord(@$this->string[--$this->posByte]);
if ($b2 < 0x81 || $b2 == 0xFF) { // these bytes never appear in the lead of a sequence
// the first byte was a character
$this->posByte += 1;
continue;
} else { // the second byte is part of a two-byte sequence, but it's unclear if it's the lead or trail byte
$start = $this->posByte + 2;
$pos = $this->posByte;
// go back bytes until a definite trail byte or start of string
while ($pos > 0) {
$b = ord(@$this->string[--$pos]);
if ($b < 0x87 || $b == 0xFF) {
$pos++;
break;
}
}
if (($start - $pos) % 2) { // the number of bytes is odd
// the first byte was a character
$this->posByte += 1;
continue;
} else { // the number of bytes is even
// we have to consume the sequence to ascertain whether it is one character (valid) or two (invalid or special)
$this->posChar--;
$this->nextCode();
if ($this->posByte < $start) { // two characters
// nothing more to do; byte position is already correct
continue;
} else { // one character
// go back two bytes
$this->posByte -= 2;
continue;
}
}
}
}
return $distance;
}
}