Browse Source

Tests for replacement encoding; readme correction

multi-byte
J. King 3 years ago
parent
commit
808b4128dd
  1. 13
      README.md
  2. 6
      lib/Encoding/Replacement.php
  3. 201
      tests/cases/Encoding/TestReplacement.php
  4. 1
      tests/phpunit.xml

13
README.md

@ -2,18 +2,7 @@
While PHP's [internationalization extension][PHP_INTL] offers excellent and extensive functionality for dealing with human languages, character encodings, and various related things, it is not always available. Moreover, its character decoder does not yield the same results as [WHATWG's Encoding standard][ENCODING], making it unsuitable for implementing parsers for URLs or HTML. The more widely used [multi-byte string extension][PHP_MBSTRING] not only suffers the same problems, but is also very slow.
Included here is a partial suite of WHATWG-compatible seekable string decoders which are reasonably performant while requiring no external dependencies or PHP extensions. At present it includes the following encodings:
* UTF-8
* UTF-16
* gb18030
* GBK
* Big5
* EUC-KR
* all single-byte encodings
* x-user-defined
Where applicable, code point encoders are also included. In time it will be extended to cover the entire suite of WHATWG character encodings, and may also provide other character-centric internationalization functionality.
Included here is a complete suite of WHATWG-compatible seekable string decoders which are reasonably performant while requiring no external dependencies or PHP extensions. Where applicable, code point encoders are also included. In time it may also provide other character-centric internationalization functionality.
[PHP_INTL]: https://php.net/manual/en/book.intl.php
[PHP_MBSTRING]: https://php.net/manual/en/book.mbstring.php

6
lib/Encoding/Replacement.php

@ -51,7 +51,7 @@ class Replacement implements Encoding {
public function nextCode() {
if (!$this->eof()) {
try {
return $this->peekCode();
return $this->peekCode()[0];
} finally {
$this->done = true;
$this->posErr = 1;
@ -80,7 +80,7 @@ class Replacement implements Encoding {
}
public function peekChar(int $num = 1): string {
if (!$this->eof()) {
if (!$this->eof() && $num > 0) {
if ($this->fatal) {
throw new DecoderException("Unable to decode string", self::E_INVALID_BYTE);
}
@ -90,7 +90,7 @@ class Replacement implements Encoding {
}
public function peekCode(int $num = 1): array {
if (!$this->eof()) {
if (!$this->eof() && $num > 0) {
if ($this->fatal) {
throw new DecoderException("Unable to decode string", self::E_INVALID_BYTE);
}

201
tests/cases/Encoding/TestReplacement.php

@ -0,0 +1,201 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\Replacement;
use MensBeam\Intl\Encoding\DecoderException;
class TestReplacement extends \MensBeam\Intl\Test\DecoderTest {
protected $testedClass = Replacement::class;
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'Arbitrary string 1' => ["20", [0xFFFD]],
'Arbitrary string 2' => ["64 8B 20 00 FF A5", [0xFFFD]],
];
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::__construct
* @covers MensBeam\Intl\Encoding\Replacement::nextCode
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::__construct
* @covers MensBeam\Intl\Encoding\Replacement::nextChar
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::seek
*/
public function testSTepBackThroughAString(string $input, array $exp) {
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @coversNothing
*/
public function testSeekThroughAString() {
$this->assertTrue(true);
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
* @covers MensBeam\Intl\Encoding\Replacement::seek
* @covers MensBeam\Intl\Encoding\Replacement::eof
*/
public function testTraversePastTheEndOfAString() {
$d = new Replacement("a");
$this->assertFalse($d->eof());
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$d->seek(1);
$this->assertTrue($d->eof());
$this->assertSame(1, $d->posChar());
$this->assertSame(1, $d->posByte());
$d->seek(1);
$this->assertTrue($d->eof());
$this->assertSame(1, $d->posChar());
$this->assertSame(1, $d->posByte());
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::peekChar
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
*/
public function testPeekAtCharacters() {
$d = new Replacement("A");
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame("\u{FFFD}", $d->peekChar(2112));
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame("", $d->peekChar(0));
$this->assertSame("", $d->peekChar(-2112));
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::peekCode
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
*/
public function testPeekAtCodePoints() {
$d = new Replacement("A");
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame([0xFFFD], $d->peekCode(2112));
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame([], $d->peekCode(0));
$this->assertSame([], $d->peekCode(-2112));
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::lenChar
* @covers MensBeam\Intl\Encoding\Replacement::lenByte
*/
public function testGetStringLength(string $input, array $points) {
return parent::testGetStringLength($input, $points);
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::nextChar
* @covers MensBeam\Intl\Encoding\Replacement::nextCode
* @covers MensBeam\Intl\Encoding\Replacement::peekChar
* @covers MensBeam\Intl\Encoding\Replacement::peekCode
* @covers MensBeam\Intl\Encoding\Replacement::rewind
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
*/
public function testReplacementModes() {
$d = new Replacement("VVVVVV", true);
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->peekCode();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(0, $d->posErr);
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->nextCode();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $d->posErr);
$this->assertSame(1, $d->posChar());
$this->assertSame(6, $d->posByte());
$d->rewind();
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->peekChar();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $d->posErr);
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->nextChar();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $d->posErr);
$this->assertSame(1, $d->posChar());
$this->assertSame(6, $d->posByte());
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::rewind
* @covers MensBeam\Intl\Encoding\Replacement::chars
* @covers MensBeam\Intl\Encoding\Replacement::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
return parent::testIterateThroughAString($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::nextCode
*/
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) {
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
}
/**
* @coversNothing
*/
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
}

1
tests/phpunit.xml

@ -28,6 +28,7 @@
<file>cases/Encoding/TestEUCKR.php</file>
<file>cases/Encoding/TestShiftJIS.php</file>
<file>cases/Encoding/TestISO2022JP.php</file>
<file>cases/Encoding/TestReplacement.php</file>
<file>cases/TestEncoding.php</file>
</testsuite>
</testsuites>

Loading…
Cancel
Save