Browse Source

Make the iterator iterate over code points rather than characters

Also fix performance measurement for the iterator; it was all wrong.
labels
J. King 6 years ago
parent
commit
7f2c11dcc2
  1. 2
      lib/Encoding/UTF8.php
  2. 12
      perf/perf.php
  3. 7
      tests/cases/Encoding/TestUTF8.php

2
lib/Encoding/UTF8.php

@ -36,7 +36,7 @@ class UTF8 implements \Iterator {
}
public function current() {
return $this->current ?? ($this->current = $this->nextChar());
return $this->current ?? ($this->current = $this->nextCode());
}
public function key() {

12
perf/perf.php

@ -35,13 +35,6 @@ $tests = [
$c = $i->nextChar();
}
}],
'Native iterator' => ["", function(string $text) {
$c = null;
$i = new UTF8($text);
while ($c !== "") {
$c = $i->nextChar();
}
}],
'Intl code points' => ["intl", function(string $text) {
$i = (function($text) {
$i = \IntlBreakIterator::createCodePointInstance();
@ -61,6 +54,11 @@ $tests = [
$p = $i->nextCode();
}
}],
'Code point iterator' => ["", function(string $text) {
$c = null;
$i = new UTF8($text);
foreach ($i as $c);
}],
];
if (!file_exists(__DIR__."/docs/")) {

7
tests/cases/Encoding/TestUTF8.php

@ -66,15 +66,12 @@ class TestUTF8 extends \PHPUnit\Framework\TestCase {
*/
public function testIterateThroughAString(string $input, array $exp) {
$out = [];
$exp = array_map(function ($v) {
return \IntlChar::chr($v);
}, $exp);
$s = new UTF8($input);
$a = 0;
$this->assertTrue(true); // prevent risky test of empty string
foreach ($s as $index => $c) {
foreach ($s as $index => $p) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame(bin2hex($exp[$a]), bin2hex($c), "Character at index $a decoded incorrectly");
$this->assertSame($exp[$a], $p, "Character at index $a decoded incorrectly");
$a++;
}
}

Loading…
Cancel
Save