Browse Source

Implement string length

labels
J. King 6 years ago
parent
commit
72291b5f0d
  1. 17
      lib/UTF8.php
  2. 19
      tests/cases/TestInstance.php

17
lib/UTF8.php

@ -10,6 +10,7 @@ class UTF8 {
protected $string;
protected $posByte = 0;
protected $posChar = 0;
protected $length = null;
public function __construct(string $string) {
@ -165,6 +166,22 @@ class UTF8 {
return $out;
}
/** Calculates the length of the string in code points
*
* Note that this involves processing to the end of the string
*/
public function len(): int {
return $this->length ?? (function() {
$pC = $this->posChar;
$pB = $this->posByte;
while ($this->nextChr() !== "");
$this->length = $this->posChar;
$this->posChar = $pC;
$this->posByte = $pB;
return $this->length;
})();
}
/** Synchronize to the byte offset of the start of the nearest character at or before byte offset $pos */
protected function sync(int $pos) {
$b = ord(@$this->string[$pos]);

19
tests/cases/TestInstance.php

@ -17,6 +17,7 @@ class TestInstance extends \PHPUnit\Framework\TestCase {
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
$s = new UTF8($input);
$out = [];
while (($p = $s->nextOrd()) !== false) {
$out[] = $p ?? 0xFFFD;
}
@ -29,6 +30,7 @@ class TestInstance extends \PHPUnit\Framework\TestCase {
* @covers \MensBeam\UTF8\UTF8::nextChr
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
$out = [];
$exp = array_map(function ($v) {
return \IntlChar::chr($v);
}, $exp);
@ -46,6 +48,7 @@ class TestInstance extends \PHPUnit\Framework\TestCase {
public function testSTepBackThroughAString(string $input, array $points) {
$s = new UTF8($input);
$a = 0;
$this->assertTrue(true); // prevent risky test of empty string
while (($p1 = $s->nextOrd() ?? 0xFFFD) !== false) {
$this->assertSame(0, $s->seek(-1));
$p2 = $s->nextOrd() ?? 0xFFFD;
@ -219,10 +222,26 @@ class TestInstance extends \PHPUnit\Framework\TestCase {
$this->assertSame(5, $s->posChr());
$this->assertSame(13, $s->posByte());
}
/**
* @dataProvider provideStrings
* @covers \MensBeam\UTF8\UTF8::len
*/
public function testGetStringLength(string $input, array $points) {
$s = new UTF8($input);
$s->seek(1);
$posChar = $s->posChr();
$posByte = $s->posByte();
$this->assertSame(sizeof($points), $s->len());
$this->assertSame($posChar, $s->posChr());
$this->assertSame($posByte, $s->posByte());
}
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'sanity check' => ["\x61\x62\x63\x31\x32\x33", [97, 98, 99, 49, 50, 51]],
'multibyte control' => ["\xE5\x8F\xA4\xE6\xB1\xA0\xE3\x82\x84\xE8\x9B\x99\xE9\xA3\x9B\xE3\x81\xB3\xE8\xBE\xBC\xE3\x82\x80\xE6\xB0\xB4\xE3\x81\xAE\xE9\x9F\xB3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]],
'mixed sample' => ["\x7A\xC2\xA2\xE6\xB0\xB4\xF0\x9D\x84\x9E\xEF\xA3\xBF\xF4\x8F\xBF\xBD\xEF\xBF\xBE", [122, 162, 27700, 119070, 63743, 1114109, 65534]],

Loading…
Cancel
Save