diff --git a/lib/Encoding/UTF8.php b/lib/Encoding/UTF8.php index 42b4940..cf528cc 100644 --- a/lib/Encoding/UTF8.php +++ b/lib/Encoding/UTF8.php @@ -6,15 +6,39 @@ declare(strict_types=1); namespace MensBeam\Intl\Encoding; -class UTF8 { +class UTF8 implements \Iterator { protected $string; protected $posByte = 0; protected $posChar = 0; - protected $length = null; + protected $lenByte = null; + protected $lenChar = null; + protected $current; + public function rewind() { + $this->posByte = 0; + $this->posChar = 0; + $this->current = null; + } + + public function valid() { + return $this->posByte < $this->lenByte; + } + + public function current() { + return $this->current ?? ($this->current = $this->nextChr()); + } + + public function key() { + return isset($this->current) ? $this->posChar - 1 : $this->posChar; + } + + public function next() { + $this->current = null; + } public function __construct(string $string) { $this->string = $string; + $this->lenByte = strlen($string); } public function posByte(): int { @@ -171,14 +195,14 @@ class UTF8 { * Note that this involves processing to the end of the string */ public function len(): int { - return $this->length ?? (function() { + return $this->lenChar ?? (function() { $pC = $this->posChar; $pB = $this->posByte; while ($this->nextChr() !== ""); - $this->length = $this->posChar; + $this->lenChar = $this->posChar; $this->posChar = $pC; $this->posByte = $pB; - return $this->length; + return $this->lenChar; })(); } diff --git a/perf/perf.php b/perf/perf.php index 83ff4ba..cbc6cc6 100644 --- a/perf/perf.php +++ b/perf/perf.php @@ -4,7 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\UTF8; +namespace MensBeam\Intl\Encoding; require __DIR__."/../tests/bootstrap.php"; @@ -30,7 +30,14 @@ $tests = [ }], 'Native characters' => ["", function(string $text) { $c = null; - $i = new \MensBeam\UTF8\UTF8($text); + $i = new UTF8($text); + while ($c !== "") { + $c = $i->nextChr(); + } + }], + 'Native iterator' => ["", function(string $text) { + $c = null; + $i = new UTF8($text); while ($c !== "") { $c = $i->nextChr(); } @@ -49,7 +56,7 @@ $tests = [ }], 'Native code points' => ["", function(string $text) { $p = null; - $i = new \MensBeam\UTF8\UTF8($text); + $i = new UTF8($text); while ($p !== false) { $p = $i->nextOrd(); } diff --git a/tests/cases/Encoding/TestUTF8.php b/tests/cases/Encoding/TestUTF8.php index 867a358..2b33988 100644 --- a/tests/cases/Encoding/TestUTF8.php +++ b/tests/cases/Encoding/TestUTF8.php @@ -55,6 +55,29 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase { $this->assertEquals($exp, $out); } + /** + * @dataProvider provideStrings + * @covers MensBeam\Intl\Encoding\UTF8::rewind + * @covers MensBeam\Intl\Encoding\UTF8::valid + * @covers MensBeam\Intl\Encoding\UTF8::current + * @covers MensBeam\Intl\Encoding\UTF8::key + * @covers MensBeam\Intl\Encoding\UTF8::next + */ + public function testIterateThroughAString(string $input, array $exp) { + $out = []; + $exp = array_map(function ($v) { + return \IntlChar::chr($v); + }, $exp); + $s = new UTF8($input); + $a = 0; + $this->assertTrue(true); // prevent risky test of empty string + foreach ($s as $index => $c) { + $this->assertSame($a, $index, "Character key at index $a reported incorrectly"); + $this->assertSame(bin2hex($exp[$a]), bin2hex($c), "Character at index $a decoded incorrectly"); + $a++; + } + } + /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\UTF8::sync