Browse Source

Add basic iterator implementation

labels
J. King 6 years ago
parent
commit
8f7a7ed49e
  1. 34
      lib/Encoding/UTF8.php
  2. 13
      perf/perf.php
  3. 23
      tests/cases/Encoding/TestUTF8.php

34
lib/Encoding/UTF8.php

@ -6,15 +6,39 @@
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class UTF8 {
class UTF8 implements \Iterator {
protected $string;
protected $posByte = 0;
protected $posChar = 0;
protected $length = null;
protected $lenByte = null;
protected $lenChar = null;
protected $current;
public function rewind() {
$this->posByte = 0;
$this->posChar = 0;
$this->current = null;
}
public function valid() {
return $this->posByte < $this->lenByte;
}
public function current() {
return $this->current ?? ($this->current = $this->nextChr());
}
public function key() {
return isset($this->current) ? $this->posChar - 1 : $this->posChar;
}
public function next() {
$this->current = null;
}
public function __construct(string $string) {
$this->string = $string;
$this->lenByte = strlen($string);
}
public function posByte(): int {
@ -171,14 +195,14 @@ class UTF8 {
* Note that this involves processing to the end of the string
*/
public function len(): int {
return $this->length ?? (function() {
return $this->lenChar ?? (function() {
$pC = $this->posChar;
$pB = $this->posByte;
while ($this->nextChr() !== "");
$this->length = $this->posChar;
$this->lenChar = $this->posChar;
$this->posChar = $pC;
$this->posByte = $pB;
return $this->length;
return $this->lenChar;
})();
}

13
perf/perf.php

@ -4,7 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\UTF8;
namespace MensBeam\Intl\Encoding;
require __DIR__."/../tests/bootstrap.php";
@ -30,7 +30,14 @@ $tests = [
}],
'Native characters' => ["", function(string $text) {
$c = null;
$i = new \MensBeam\UTF8\UTF8($text);
$i = new UTF8($text);
while ($c !== "") {
$c = $i->nextChr();
}
}],
'Native iterator' => ["", function(string $text) {
$c = null;
$i = new UTF8($text);
while ($c !== "") {
$c = $i->nextChr();
}
@ -49,7 +56,7 @@ $tests = [
}],
'Native code points' => ["", function(string $text) {
$p = null;
$i = new \MensBeam\UTF8\UTF8($text);
$i = new UTF8($text);
while ($p !== false) {
$p = $i->nextOrd();
}

23
tests/cases/Encoding/TestUTF8.php

@ -55,6 +55,29 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
$this->assertEquals($exp, $out);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::rewind
* @covers MensBeam\Intl\Encoding\UTF8::valid
* @covers MensBeam\Intl\Encoding\UTF8::current
* @covers MensBeam\Intl\Encoding\UTF8::key
* @covers MensBeam\Intl\Encoding\UTF8::next
*/
public function testIterateThroughAString(string $input, array $exp) {
$out = [];
$exp = array_map(function ($v) {
return \IntlChar::chr($v);
}, $exp);
$s = new UTF8($input);
$a = 0;
$this->assertTrue(true); // prevent risky test of empty string
foreach ($s as $index => $c) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame(bin2hex($exp[$a]), bin2hex($c), "Character at index $a decoded incorrectly");
$a++;
}
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::sync

Loading…
Cancel
Save