From 6fd50f06813feb879c411c9a1b094f77676aa592 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 27 Jul 2018 19:11:33 -0400 Subject: [PATCH] Ensure char and byte position never goes beyond the end of the string --- lib/UTF8String.php | 8 +++++--- tests/cases/TestCodec.php | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/UTF8String.php b/lib/UTF8String.php index 04faa81..4a2e2b6 100644 --- a/lib/UTF8String.php +++ b/lib/UTF8String.php @@ -31,7 +31,9 @@ class UTF8String { public function nextChr(): string { // get the byte at the current position $b = @$this->string[$this->posByte]; - if (ord($b) < 0x80) { + if ($b === "") { + return ""; + } elseif (ord($b) < 0x80) { // if the byte is an ASCII character or end of input, simply return it $this->posChar++; $this->posByte++; @@ -50,16 +52,16 @@ class UTF8String { // this function effectively implements https://encoding.spec.whatwg.org/#utf-8-decoder // though it differs from a slavish implementation because it operates on only a single // character rather than a whole stream - $this->posChar++; // optimization for ASCII characters $b = @$this->string[$this->posByte]; if ($b=="") { - $this->posByte++; return false; } elseif (($b = ord($b)) < 0x80) { + $this->posChar++; $this->posByte++; return $b; } + $this->posChar++; $point = 0; $seen = 0; $needed = 1; diff --git a/tests/cases/TestCodec.php b/tests/cases/TestCodec.php index fe74dfb..f78db68 100644 --- a/tests/cases/TestCodec.php +++ b/tests/cases/TestCodec.php @@ -107,6 +107,36 @@ class TestConf extends \PHPUnit\Framework\TestCase { $this->assertSame(0, $s->posChr()); $this->assertSame(0, $s->posByte()); } + + /** + * @covers \MensBeam\UTF8\UTF8String::posChr + * @covers \MensBeam\UTF8\UTF8String::posByte + */ + public function testTraversePastTheEndOfAString() { + $s = new UTF8String("a"); + $this->assertSame(0, $s->posChr()); + $this->assertSame(0, $s->posByte()); + + $this->assertSame("a", $s->nextChr()); + $this->assertSame(1, $s->posChr()); + $this->assertSame(1, $s->posByte()); + + $this->assertSame("", $s->nextChr()); + $this->assertSame(1, $s->posChr()); + $this->assertSame(1, $s->posByte()); + + $s = new UTF8String("a"); + $this->assertSame(0, $s->posChr()); + $this->assertSame(0, $s->posByte()); + + $this->assertSame(ord("a"), $s->nextOrd()); + $this->assertSame(1, $s->posChr()); + $this->assertSame(1, $s->posByte()); + + $this->assertSame(false, $s->nextOrd()); + $this->assertSame(1, $s->posChr()); + $this->assertSame(1, $s->posByte()); + } public function provideStrings() { return [