Browse Source

More safely back up state

labels
J. King 6 years ago
parent
commit
ccf1fe180a
  1. 39
      lib/Encoding/UTF8.php
  2. 34
      tests/cases/Encoding/TestUTF8.php

39
lib/Encoding/UTF8.php

@ -6,7 +6,7 @@
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class UTF8 implements \Iterator {
class UTF8 implements \Iterator {
protected $string;
protected $posByte = 0;
protected $posChar = 0;
@ -167,41 +167,35 @@ class UTF8 implements \Iterator {
/** Retrieves the next $num characters from the string, without advancing the character pointer */
public function peekChr(int $num = 1): string {
$out = "";
$pC = $this->posChar;
$pB = $this->posByte;
$state = $this->stateSave();
while ($num-- > 0 && ($b = $this->nextChr()) !== "") {
$out .= $b;
}
$this->posChar = $pC;
$this->posByte = $pB;
$this->stateApply($state);
return $out;
}
/** Retrieves the next $num code points from the string, without advancing the character pointer */
public function peekOrd(int $num = 1): array {
$out = [];
$pC = $this->posChar;
$pB = $this->posByte;
$state = $this->stateSave();
while ($num-- > 0 && ($b = $this->nextOrd()) !== false) {
$out[] = $b;
}
$this->posChar = $pC;
$this->posByte = $pB;
$this->stateApply($state);
return $out;
}
/** Calculates the length of the string in code points
*
/** Calculates the length of the string in code points
*
* Note that this involves processing to the end of the string
*/
public function len(): int {
return $this->lenChar ?? (function() {
$pC = $this->posChar;
$pB = $this->posByte;
while ($this->nextChr() !== "");
$state = $this->stateSave();
while ($this->nextOrd() !== false);
$this->lenChar = $this->posChar;
$this->posChar = $pC;
$this->posByte = $pB;
$this->stateApply($state);
return $this->lenChar;
})();
}
@ -228,6 +222,19 @@ class UTF8 implements \Iterator {
}
}
protected function stateSave(): array {
return [
'posChar' => $this->posChar,
'posByte' => $this->posByte,
];
}
protected function stateApply(array $state) {
foreach($state as $key => $value) {
$this->$key = $value;
}
}
/** Returns the UTF-8 encoding of $codePoint
*
* If $codePoint is less than 0 or greater than 1114111, an empty string is returned

34
tests/cases/Encoding/TestUTF8.php

@ -9,7 +9,7 @@ namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\UTF8;
class TestUTF8 extends \PHPUnit\Framework\TestCase {
/**
* @covers MensBeam\Intl\Encoding\UTF8::chr
*/
@ -23,7 +23,7 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$this->assertSame("", UTF8::chr(\PHP_INT_MAX));
$this->assertSame("", UTF8::chr(\PHP_INT_MIN));
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::__construct
@ -37,7 +37,7 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
}
$this->assertEquals($exp, $out);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::__construct
@ -54,7 +54,7 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
}
$this->assertEquals($exp, $out);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::rewind
@ -77,7 +77,7 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$a++;
}
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::sync
@ -93,7 +93,7 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$this->assertSame(++$a, $s->posChr(), "Character position should be $a");
}
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::seek
* @covers MensBeam\Intl\Encoding\UTF8::posChr
@ -122,32 +122,32 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$this->assertSame(1, $s->seek(-1));
$this->assertSame(0, $s->posChr());
$this->assertSame(0, $s->posByte());
$this->assertSame(0, $s->seek(1));
$this->assertSame(1, $s->posChr());
$this->assertSame(1, $s->posByte());
$this->assertSame(0, $s->seek(2));
$this->assertSame(3, $s->posChr());
$this->assertSame(6, $s->posByte());
$this->assertSame(0, $s->seek(4));
$this->assertSame(7, $s->posChr());
$this->assertSame(20, $s->posByte());
$this->assertSame(1, $s->seek(1));
$this->assertSame(7, $s->posChr());
$this->assertSame(20, $s->posByte());
$this->assertSame(0, $s->seek(-3));
$this->assertSame(4, $s->posChr());
$this->assertSame(10, $s->posByte());
$this->assertSame(6, $s->seek(-10));
$this->assertSame(0, $s->posChr());
$this->assertSame(0, $s->posByte());
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::posChr
* @covers MensBeam\Intl\Encoding\UTF8::posByte
@ -177,7 +177,7 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$this->assertSame(1, $s->posChr());
$this->assertSame(1, $s->posByte());
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::peekChr
*/
@ -218,7 +218,7 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$this->assertSame(5, $s->posChr());
$this->assertSame(13, $s->posByte());
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::peekOrd
*/
@ -259,10 +259,12 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$this->assertSame(5, $s->posChr());
$this->assertSame(13, $s->posByte());
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::len
* @covers MensBeam\Intl\Encoding\UTF8::stateSave
* @covers MensBeam\Intl\Encoding\UTF8::stateApply
*/
public function testGetStringLength(string $input, array $points) {
$s = new UTF8($input);

Loading…
Cancel
Save