Browse Source

Change iterator to a set of generators

Not only is the faster than a classical iterator (though still not as
fast as a while loop), but it also offers the choice of characters
or code points.
labels
J. King 6 years ago
parent
commit
e2c4136001
  1. 22
      lib/Encoding/UTF8.php
  2. 9
      perf/perf.php
  3. 39
      tests/cases/Encoding/TestUTF8.php

22
lib/Encoding/UTF8.php

@ -6,7 +6,7 @@
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class UTF8 implements \Iterator {
class UTF8 {
const MODE_NULL = 0;
const MODE_REPLACE = 1;
const MODE_HTML = 2;
@ -31,20 +31,16 @@ class UTF8 implements \Iterator {
$this->current = null;
}
public function valid() {
return $this->posByte < $this->lenByte;
}
public function current() {
return $this->current ?? ($this->current = $this->nextCode());
}
public function key() {
return isset($this->current) ? $this->posChar - 1 : $this->posChar;
public function chars(): \Generator {
while (($c = $this->nextChar()) !== "") {
yield ($this->posChar - 1) => $c;
}
}
public function next() {
$this->current = null;
public function codes(): \Generator {
while (($c = $this->nextCode()) !== false) {
yield ($this->posChar - 1) => $c;
}
}
public function __construct(string $string, bool $fatal = false) {

9
perf/perf.php

@ -35,6 +35,11 @@ $tests = [
$c = $i->nextChar();
}
}],
'Character generator' => ["", function(string $text) {
$c = null;
$i = new UTF8($text);
foreach ($i->chars() as $c);
}],
'Intl code points' => ["intl", function(string $text) {
$i = (function($text) {
$i = \IntlBreakIterator::createCodePointInstance();
@ -54,10 +59,10 @@ $tests = [
$p = $i->nextCode();
}
}],
'Code point iterator' => ["", function(string $text) {
'Code point generator' => ["", function(string $text) {
$c = null;
$i = new UTF8($text);
foreach ($i as $c);
foreach ($i->codes() as $c);
}],
];

39
tests/cases/Encoding/TestUTF8.php

@ -59,21 +59,50 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::rewind
* @covers MensBeam\Intl\Encoding\UTF8::valid
* @covers MensBeam\Intl\Encoding\UTF8::current
* @covers MensBeam\Intl\Encoding\UTF8::key
* @covers MensBeam\Intl\Encoding\UTF8::next
* @covers MensBeam\Intl\Encoding\UTF8::chars
* @covers MensBeam\Intl\Encoding\UTF8::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
$out = [];
$s = new UTF8($input);
$a = 0;
$this->assertTrue(true); // prevent risky test of empty string
foreach ($s as $index => $p) {
foreach ($s->codes() as $index => $p) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame($exp[$a], $p, "Character at index $a decoded incorrectly");
$a++;
}
$a = 0;
foreach ($s->codes() as $p) {
$a++;
}
$this->assertSame(0, $a);
$s->rewind();
foreach ($s->codes() as $p) {
$a++;
}
$this->assertSame(sizeof($exp), $a);
$exp = array_map(function ($v) {
return \IntlChar::chr($v);
}, $exp);
foreach ($s->chars() as $index => $p) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame(bin2hex($exp[$a]), bin2hex($p), "Character at index $a decoded incorrectly");
$a++;
}
$a = 0;
foreach ($s->chars() as $p) {
$a++;
}
$this->assertSame(0, $a);
$s->rewind();
foreach ($s->chars() as $p) {
$a++;
}
$this->assertSame(sizeof($exp), $a);
}
/**

Loading…
Cancel
Save