From e2c41360013e28d024395428c95389493c08a109 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 10 Aug 2018 12:24:10 -0400 Subject: [PATCH] Change iterator to a set of generators Not only is the faster than a classical iterator (though still not as fast as a while loop), but it also offers the choice of characters or code points. --- lib/Encoding/UTF8.php | 22 +++++++---------- perf/perf.php | 9 +++++-- tests/cases/Encoding/TestUTF8.php | 39 +++++++++++++++++++++++++++---- 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/lib/Encoding/UTF8.php b/lib/Encoding/UTF8.php index 946fc21..0c2955e 100644 --- a/lib/Encoding/UTF8.php +++ b/lib/Encoding/UTF8.php @@ -6,7 +6,7 @@ declare(strict_types=1); namespace MensBeam\Intl\Encoding; -class UTF8 implements \Iterator { +class UTF8 { const MODE_NULL = 0; const MODE_REPLACE = 1; const MODE_HTML = 2; @@ -31,20 +31,16 @@ class UTF8 implements \Iterator { $this->current = null; } - public function valid() { - return $this->posByte < $this->lenByte; - } - - public function current() { - return $this->current ?? ($this->current = $this->nextCode()); - } - - public function key() { - return isset($this->current) ? $this->posChar - 1 : $this->posChar; + public function chars(): \Generator { + while (($c = $this->nextChar()) !== "") { + yield ($this->posChar - 1) => $c; + } } - public function next() { - $this->current = null; + public function codes(): \Generator { + while (($c = $this->nextCode()) !== false) { + yield ($this->posChar - 1) => $c; + } } public function __construct(string $string, bool $fatal = false) { diff --git a/perf/perf.php b/perf/perf.php index 49e3c1e..47ba8b6 100644 --- a/perf/perf.php +++ b/perf/perf.php @@ -35,6 +35,11 @@ $tests = [ $c = $i->nextChar(); } }], + 'Character generator' => ["", function(string $text) { + $c = null; + $i = new UTF8($text); + foreach ($i->chars() as $c); + }], 'Intl code points' => ["intl", function(string $text) { $i = (function($text) { $i = \IntlBreakIterator::createCodePointInstance(); @@ -54,10 +59,10 @@ $tests = [ $p = $i->nextCode(); } }], - 'Code point iterator' => ["", function(string $text) { + 'Code point generator' => ["", function(string $text) { $c = null; $i = new UTF8($text); - foreach ($i as $c); + foreach ($i->codes() as $c); }], ]; diff --git a/tests/cases/Encoding/TestUTF8.php b/tests/cases/Encoding/TestUTF8.php index c8cfa3a..63b06c5 100644 --- a/tests/cases/Encoding/TestUTF8.php +++ b/tests/cases/Encoding/TestUTF8.php @@ -59,21 +59,50 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase { /** * @dataProvider provideStrings * @covers MensBeam\Intl\Encoding\UTF8::rewind - * @covers MensBeam\Intl\Encoding\UTF8::valid - * @covers MensBeam\Intl\Encoding\UTF8::current - * @covers MensBeam\Intl\Encoding\UTF8::key - * @covers MensBeam\Intl\Encoding\UTF8::next + * @covers MensBeam\Intl\Encoding\UTF8::chars + * @covers MensBeam\Intl\Encoding\UTF8::codes */ public function testIterateThroughAString(string $input, array $exp) { $out = []; $s = new UTF8($input); $a = 0; $this->assertTrue(true); // prevent risky test of empty string - foreach ($s as $index => $p) { + foreach ($s->codes() as $index => $p) { $this->assertSame($a, $index, "Character key at index $a reported incorrectly"); $this->assertSame($exp[$a], $p, "Character at index $a decoded incorrectly"); $a++; } + $a = 0; + foreach ($s->codes() as $p) { + $a++; + } + $this->assertSame(0, $a); + $s->rewind(); + foreach ($s->codes() as $p) { + $a++; + } + $this->assertSame(sizeof($exp), $a); + + $exp = array_map(function ($v) { + return \IntlChar::chr($v); + }, $exp); + + foreach ($s->chars() as $index => $p) { + $this->assertSame($a, $index, "Character key at index $a reported incorrectly"); + $this->assertSame(bin2hex($exp[$a]), bin2hex($p), "Character at index $a decoded incorrectly"); + $a++; + } + $a = 0; + foreach ($s->chars() as $p) { + $a++; + } + $this->assertSame(0, $a); + $s->rewind(); + foreach ($s->chars() as $p) { + $a++; + } + $this->assertSame(sizeof($exp), $a); + } /**