Browse Source

Generalize handling of dirty EOF

span
J. King 4 years ago
parent
commit
fc44bb1415
  1. 7
      lib/Encoding/Big5.php
  2. 9
      lib/Encoding/EUCKR.php
  3. 8
      lib/Encoding/GBCommon.php
  4. 8
      lib/Encoding/GenericEncoding.php
  5. 8
      lib/Encoding/UTF16.php

7
lib/Encoding/Big5.php

File diff suppressed because one or more lines are too long

9
lib/Encoding/EUCKR.php

File diff suppressed because one or more lines are too long

8
lib/Encoding/GBCommon.php

@ -13,8 +13,6 @@ abstract class GBCommon implements StatelessEncoding {
const TABLE_RANGES = [0,36,38,45,50,81,89,95,96,100,103,104,105,109,126,133,148,172,175,179,208,306,307,308,309,310,311,312,313,341,428,443,544,545,558,741,742,749,750,805,819,820,7922,7924,7925,7927,7934,7943,7944,7945,7950,8062,8148,8149,8152,8164,8174,8236,8240,8262,8264,8374,8380,8381,8384,8388,8390,8392,8393,8394,8396,8401,8406,8416,8419,8424,8437,8439,8445,8482,8485,8496,8521,8603,8936,8946,9046,9050,9063,9066,9076,9092,9100,9108,9111,9113,9131,9162,9164,9218,9219,11329,11331,11334,11336,11346,11361,11363,11366,11370,11372,11375,11389,11682,11686,11687,11692,11694,11714,11716,11723,11725,11730,11736,11982,11989,12102,12336,12348,12350,12384,12393,12395,12397,12510,12553,12851,12962,12973,13738,13823,13919,13933,14080,14298,14585,14698,15583,15847,16318,16434,16438,16481,16729,17102,17122,17315,17320,17402,17418,17859,17909,17911,17915,17916,17936,17939,17961,18664,18703,18814,18962,19043,33469,33470,33471,33484,33485,33490,33497,33501,33505,33513,33520,33536,33550,37845,37921,37948,38029,38038,38064,38065,38066,38069,38075,38076,38078,39108,39109,39113,39114,39115,39116,39265,39394,39420,189000,1237576];
const TABLE_OFFSETS = [128,165,169,178,184,216,226,235,238,244,248,251,253,258,276,284,300,325,329,334,364,463,465,467,469,471,473,475,477,506,594,610,712,716,730,930,938,962,970,1026,1104,1106,8209,8215,8218,8222,8231,8241,8244,8246,8252,8365,8452,8454,8458,8471,8482,8556,8570,8596,8602,8713,8720,8722,8726,8731,8737,8740,8742,8748,8751,8760,8766,8777,8781,8787,8802,8808,8816,8854,8858,8870,8896,8979,9322,9372,9548,9588,9616,9622,9634,9652,9662,9672,9676,9680,9702,9735,9738,9793,9795,11906,11909,11913,11917,11928,11944,11947,11951,11956,11960,11964,11979,12284,12292,12312,12319,12330,12351,12436,12447,12535,12543,12586,12842,12850,12964,13200,13215,13218,13253,13263,13267,13270,13384,13428,13727,13839,13851,14617,14703,14801,14816,14964,15183,15471,15585,16471,16736,17208,17325,17330,17374,17623,17997,18018,18212,18218,18301,18318,18760,18811,18814,18820,18823,18844,18848,18872,19576,19620,19738,19887,40870,59244,59336,59367,59413,59417,59423,59431,59437,59443,59452,59460,59478,59493,63789,63866,63894,63976,63986,64016,64018,64021,64025,64034,64037,64042,65074,65093,65107,65112,65127,65132,65375,65510,null,65536,1114112];
protected $dirtyEOF = 0;
public function nextCode() {
$first = 0;
$second = 0;
@ -143,12 +141,6 @@ abstract class GBCommon implements StatelessEncoding {
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
if ($this->posByte == $this->lenByte && $this->dirtyEOF > 0) {
// if we are at the end of the string and it did not terminate cleanly, go back the correct number of dirty bytes to seek through the last character
$this->posByte -= $this->dirtyEOF;
$distance--;
$this->posChar--;
}
while ($distance > 0 && $this->posByte > 0) {
$distance--;
$this->posChar--;

8
lib/Encoding/GenericEncoding.php

@ -12,6 +12,7 @@ trait GenericEncoding {
protected $posChar = 0;
protected $lenByte = null;
protected $lenChar = null;
protected $dirtyEOF = 0;
protected $errMode = self::MODE_REPLACE;
protected $allowSurrogates = false;
@ -68,6 +69,13 @@ trait GenericEncoding {
if (!$this->posChar) {
return $distance;
}
if ($this->dirtyEOF > 0) {
// if we are at the end of the string and it did not terminate cleanly, go back the correct number of dirty bytes to seek through the last character
$this->posByte -= $this->dirtyEOF;
$this->dirtyEOF = 0;
$distance--;
$this->posChar--;
}
$mode = $this->errMode;
$this->errMode = self::MODE_NULL;
$out = $this->seekBack($distance);

8
lib/Encoding/UTF16.php

@ -8,8 +8,6 @@ namespace MensBeam\Intl\Encoding;
abstract class UTF16 implements Encoding {
use GenericEncoding;
protected $dirtyEOF = 0;
public function nextCode() {
$lead_b = null;
@ -80,12 +78,6 @@ abstract class UTF16 implements Encoding {
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
if ($this->posByte >= $this->lenByte && $this->dirtyEOF > 0) {
// if we are at the end of the string and it did not terminate cleanly, go back the correct number of dirty bytes to seek through the last character
$this->posByte -= $this->dirtyEOF;
$distance--;
$this->posChar--;
}
while ($distance > 0 && $this->posByte > 0) {
$distance--;
$this->posChar--;

Loading…
Cancel
Save