6176]; const TABLE_RANGES = [0,36,38,45,50,81,89,95,96,100,103,104,105,109,126,133,148,172,175,179,208,306,307,308,309,310,311,312,313,341,428,443,544,545,558,741,742,749,750,805,819,820,7922,7924,7925,7927,7934,7943,7944,7945,7950,8062,8148,8149,8152,8164,8174,8236,8240,8262,8264,8374,8380,8381,8384,8388,8390,8392,8393,8394,8396,8401,8406,8416,8419,8424,8437,8439,8445,8482,8485,8496,8521,8603,8936,8946,9046,9050,9063,9066,9076,9092,9100,9108,9111,9113,9131,9162,9164,9218,9219,11329,11331,11334,11336,11346,11361,11363,11366,11370,11372,11375,11389,11682,11686,11687,11692,11694,11714,11716,11723,11725,11730,11736,11982,11989,12102,12336,12348,12350,12384,12393,12395,12397,12510,12553,12851,12962,12973,13738,13823,13919,13933,14080,14298,14585,14698,15583,15847,16318,16434,16438,16481,16729,17102,17122,17315,17320,17402,17418,17859,17909,17911,17915,17916,17936,17939,17961,18664,18703,18814,18962,19043,33469,33470,33471,33484,33485,33490,33497,33501,33505,33513,33520,33536,33550,37845,37921,37948,38029,38038,38064,38065,38066,38069,38075,38076,38078,39108,39109,39113,39114,39115,39116,39265,39394,39420,189000,1237576]; const TABLE_OFFSETS = [128,165,169,178,184,216,226,235,238,244,248,251,253,258,276,284,300,325,329,334,364,463,465,467,469,471,473,475,477,506,594,610,712,716,730,930,938,962,970,1026,1104,1106,8209,8215,8218,8222,8231,8241,8244,8246,8252,8365,8452,8454,8458,8471,8482,8556,8570,8596,8602,8713,8720,8722,8726,8731,8737,8740,8742,8748,8751,8760,8766,8777,8781,8787,8802,8808,8816,8854,8858,8870,8896,8979,9322,9372,9548,9588,9616,9622,9634,9652,9662,9672,9676,9680,9702,9735,9738,9793,9795,11906,11909,11913,11917,11928,11944,11947,11951,11956,11960,11964,11979,12284,12292,12312,12319,12330,12351,12436,12447,12535,12543,12586,12842,12850,12964,13200,13215,13218,13253,13263,13267,13270,13384,13428,13727,13839,13851,14617,14703,14801,14816,14964,15183,15471,15585,16471,16736,17208,17325,17330,17374,17623,17997,18018,18212,18218,18301,18318,18760,18811,18814,18820,18823,18844,18848,18872,19576,19620,19738,19887,40870,59244,59336,59367,59413,59417,59423,59431,59437,59443,59452,59460,59478,59493,63789,63866,63894,63976,63986,64016,64018,64021,64025,64034,64037,64042,65074,65093,65107,65112,65127,65132,65375,65510,null,65536,1114112]; protected static $pointerCache; public function nextCode() { $first = 0; $second = 0; $third = 0; $this->posChar++; while (($b = @$this->string[$this->posByte++]) !== "") { $b = ord($b); if ($first === 0) { if ($b < 0x80) { return $b; } elseif ($b === 0x80) { return 0x20AC; } elseif ($b > 0x80 && $b < 0xFF) { $first = $b; continue; } else { return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } elseif ($second === 0) { if ($b > 0x2F && $b < 0x3A) { $second = $b; continue; } else { $codePoint = null; if (($b > 0x3A && $b < 0x7F) || ($b > 0x7F && $b < 0xFF)) { $offset = ($b < 0x7F) ? 0x40 : 0x41; $pointer = ($first - 0x81) * 190 + ($b - $offset); $codePoint = self::TABLE_CODES[$pointer] ?? null; } if (!is_null($codePoint)) { return $codePoint; } elseif ($b < 0x80) { return $this->errDec($this->errMode, $this->posChar - 1, --$this->posByte - 1); } else { return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2); } } } elseif ($third === 0) { if ($b > 0x80 && $b < 0xFF) { $third = $b; continue; } else { $this->posByte -= 2; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } else { if ($b > 0x2F && $b < 0x3A) { // look up code point $pointer = (($first - 0x81) * (10 * 126 * 10)) + (($second - 0x30) * (10 * 126)) + (($third - 0x81) * 10) + $b - 0x30; if ($pointer === 7457) { return 0xE7C7; } for ($a = 1; $a < sizeof(self::TABLE_RANGES); $a++) { if ($pointer < self::TABLE_RANGES[$a]) { $offset = self::TABLE_RANGES[$a - 1]; $codePointOffset = self::TABLE_OFFSETS[$a - 1]; break; } } if (isset($codePointOffset)) { return $codePointOffset + $pointer - $offset; } else { return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 4); } } else { $this->posByte -= 3; return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 1); } } } $this->posByte--; if (($first + $second + $third) == 0) { // clean EOF $this->posChar--; return false; } else { // dirty EOF return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - ($third ? 3 : ($second ? 2 : 1))); } } public static function encode(int $codePoint, bool $fatal = true): string { if ($codePoint < 0 || $codePoint > 0x10FFFF) { throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT); } elseif ($codePoint < 128) { return chr($codePoint); } elseif ($codePoint == 0xE5E5) { return self::errEnc(!$fatal, $codePoint); } elseif (static::GBK && $codePoint == 0x20AC) { return "\x80"; } else { $pointer = self::TABLE_POINTERS[$codePoint] ?? (self::$pointerCache ?? (self::$pointerCache = array_flip(self::TABLE_CODES)))[$codePoint] ?? null; if (isset($pointer)) { $lead = (int) ($pointer / 190) + 0x81; $trail = $pointer % 190; $offset = ($trail < 0x3F) ? 0x40 : 0x41; return chr($lead).chr($trail + $offset); } elseif (static::GBK) { return self::errEnc(!$fatal, $codePoint); } else { if ($codePoint == 0xE7C7) { $pointer = 7457; } else { $index = 0; while ($codePoint >= self::TABLE_OFFSETS[$index + 1]) { $index++; } $offset = self::TABLE_OFFSETS[$index]; $pointer_offset = self::TABLE_RANGES[$index]; $pointer = $pointer_offset + $codePoint - $offset; } $byte1 = (int) ($pointer / (10 * 126 * 10)) + 0x81; $pointer %= (10 * 126 * 10); $byte2 = (int) ($pointer / (10 * 126)) + 0x30; $pointer %= (10 * 126); $byte3 = (int) ($pointer / 10) + 0x81; $byte4 = ($pointer % 10) + 0x30; return chr($byte1).chr($byte2).chr($byte3).chr($byte4); } } } /** Implements backward seeking $distance characters */ protected function seekBack(int $distance): int { while ($distance > 0 && $this->posByte > 0) { $distance--; $this->posChar--; if ($this->posByte === $this->errMark) { // the previous character was malformed // move to the correct sync position, pop the error stack, and continue $this->posByte = $this->errSync; list($this->errMark, $this->errSync) = array_pop($this->errStack); continue; } // go back one byte $b1 = ord(@$this->string[--$this->posByte]); if ($b1 > 0x80) { // only GBK characters end in high bytes // the preceeding byte starts the character $this->posByte--; continue; } elseif ($b1 < 0x30 || $this->errMark === $this->posByte || $this->posByte === 0) { // the byte is unambiguously a single-byte character // the byte is a character continue; } elseif ($b1 >= 0x30 && $b1 <= 0x39) { // this can either be the last byte of a four-byte gb18030 character or an ASCII character if ($this->posByte < 3) { // there are not enough bytes left for this to be a four-byte sequence // the byte is a character continue; } elseif ($this->errMark > ($this->posByte - 3)) { // there was an error in what would otherwise be the four-byte sequence // the byte is a character continue; } // go back a second byte $b2 = ord(@$this->string[$this->posByte - 1]); if ($b2 > 0x80) { // go back a third byte $b3 = ord(@$this->string[$this->posByte - 2]); if ($b3 >= 0x30 && $b3 <= 0x39) { // the next byte starts the character $this->posByte -= 3; continue; } } // if the byte pattern doesn't match the first byte is a character continue; } else { // this can either be the trail of a two-byte GBK character, or a single-byte character // go back a second byte $b2 = ord(@$this->string[--$this->posByte]); if ($b2 < 0x81) { // these bytes never appear in the lead of a sequence // the first byte was a character $this->posByte += 1; continue; } else { // the second byte is part of a two-byte sequence, but it's unclear if it's the lead or trail byte $start = $this->posByte + 2; $pos = $this->posByte; // go back bytes until an error mark, an ASCII byte, or start of string while ($pos > 0 && $pos > $this->errMark) { $b = ord(@$this->string[--$pos]); if ($b < 0x81) { $pos++; break; } } if (($start - $pos) % 2) { // the number of bytes is odd // the first byte was a character $this->posByte += 1; continue; } else { // the number of bytes is even // the second byte was a character continue; } } } } return $distance; } }