Browse Source

Implement x-user-defined decoder

Also further refactored tests to better account for one-way encodings
span
J. King 6 years ago
parent
commit
d5327a3b83
  1. 91
      lib/Encoding/XUserDefined.php
  2. 2
      tests/cases/Encoding/TestGB18030.php
  3. 8
      tests/cases/Encoding/TestSingleByte.php
  4. 16
      tests/cases/Encoding/TestUTF16LE.php
  5. 2
      tests/cases/Encoding/TestUTF8.php
  6. 132
      tests/cases/Encoding/TestXUserDefined.php
  7. 23
      tests/lib/CoderDecoderTest.php
  8. 14
      tests/lib/DecoderTest.php
  9. 1
      tests/phpunit.xml

91
lib/Encoding/XUserDefined.php

@ -0,0 +1,91 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class XUserDefined implements Encoding {
use GenericEncoding;
/** Retrieve the next character in the string, in UTF-8 encoding
*
* The returned character may be a replacement character, or the empty string if the end of the string has been reached
*/
public function nextChar(): string {
// get the byte at the current position
$b = @$this->string[$this->posChar];
if ($b === "") {
return "";
}
$this->posChar++;
$p = ord($b);
if ($p < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
return $b;
} else {
return UTF8::encode(0xF700 + $p);
}
}
/** Decodes the next character from the string and returns its code point number
*
* If the end of the string has been reached, false is returned
*
* @return int|bool
*/
public function nextCode() {
// get the byte at the current position
$b = @$this->string[$this->posChar];
if ($b === "") {
return false;
}
$this->posChar++;
$p = ord($b);
if ($p < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
return $p;
} else {
return 0xF700 + $p;
}
}
/** Advance $distance characters through the string
*
* If $distance is negative, the operation will be performed in reverse
*
* If the end (or beginning) of the string was reached before the end of the operation, the remaining number of requested characters is returned
*/
public function seek(int $distance): int {
if ($distance > 0) {
while ($this->posChar < $this->lenByte && $distance > 0) {
$this->nextCode();
$distance--;
}
return $distance;
} elseif ($distance < 0) {
$distance = abs($distance);
while ($this->posChar > 0 && $distance > 0) {
$this->posChar--;
$distance--;
}
return $distance;
} else {
return 0;
}
}
/** Returns the current byte position of the decoder */
public function posByte(): int {
return $this->posChar;
}
/** Calculates the length of the string in code points
*
* Note that this may involve processing to the end of the string
*/
public function len(): int {
return $this->lenByte;
}
}

2
tests/cases/Encoding/TestGB18030.php

@ -10,7 +10,7 @@ use MensBeam\Intl\Encoding\GBK;
use MensBeam\Intl\Encoding\GB18030;
use MensBeam\Intl\Encoding\EncoderException;
class TestGB18030 extends \MensBeam\Intl\Test\EncodingTest {
class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = GB18030::class;
/*
Char 0 U+007A (1 byte) Offset 0

8
tests/cases/Encoding/TestSingleByte.php

@ -9,7 +9,7 @@ namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\SingleByteEncoding;
use MensBeam\Intl\Encoding\EncoderException;
class TestSingleByte extends \MensBeam\Intl\Test\EncodingTest {
class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest {
// maps taken from https://github.com/web-platform-tests/wpt/blob/d6c29bef8d4bcdfe4f689defca73360b07647d71/encoding/single-byte-decoder.html
// ISO-8859-8 was duplicated for ISO-8859-8-I
protected static $maps = [
@ -115,11 +115,13 @@ class TestSingleByte extends \MensBeam\Intl\Test\EncodingTest {
}
/**
* @dataProvider provideStrings
* @coversNothing
*/
public function testSTepBackThroughAString(string $input = "", array $exp = []) {
public function testSTepBackThroughAString(string $input, array $exp, string $class = SingleByteEncoding::class) {
// this test has no meaning for single-byte encodings
$this->assertTrue(true);
$this->testedClass = $class;
return parent::testSTepBackThroughAString($input, $exp);
}
/**

16
tests/cases/Encoding/TestUTF16LE.php

@ -9,7 +9,7 @@ namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\UTF16LE;
use MensBeam\Intl\Encoding\UTF16BE;
class TestUTF16LE extends \MensBeam\Intl\Test\EncodingTest {
class TestUTF16LE extends \MensBeam\Intl\Test\DecoderTest {
protected $testedClass = UTF16LE::class;
/*
Char 0 U+007A (2 byte) Offset 0
@ -28,15 +28,6 @@ class TestUTF16LE extends \MensBeam\Intl\Test\EncodingTest {
protected $brokenChar = "0000 00DC 0000";
protected $lowerA = "a\x00";
/**
* @dataProvider provideCodePoints
* @coversNothing
*/
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
// UTF-16 has no encoder
$this->assertTrue(true);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF16::__construct
@ -126,11 +117,6 @@ class TestUTF16LE extends \MensBeam\Intl\Test\EncodingTest {
return parent::testIterateThroughAString($input, $exp);
}
public function provideCodePoints() {
// UTF-16 has no encoder
return [[true, 0, ""]];
}
public function provideStrings() {
return [
// control samples

2
tests/cases/Encoding/TestUTF8.php

@ -9,7 +9,7 @@ namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\UTF8;
use MensBeam\Intl\Encoding\EncoderException;
class TestUTF8 extends \MensBeam\Intl\Test\EncodingTest {
class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = UTF8::class;
/*
Char 0 U+007A (1 byte) Offset 0

132
tests/cases/Encoding/TestXUserDefined.php

@ -0,0 +1,132 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\XUserDefined;
class TestXUserDefined extends \MensBeam\Intl\Test\DecoderTest {
protected $testedClass = XUserDefined::class;
/* X-user-defined doesn't have complex seeking, so this string is generic */
protected $seekString = "30 31 32 33 34 35 36";
protected $seekCodes = [0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36];
protected $seekOffsets = [0, 1, 2, 3, 4, 5, 6, 7];
/* This string is supposed to contain an invalid character sequence sandwiched between two null characters, but x-user-defined has no invalid characters */
protected $brokenChar = "";
protected $lowerA = "a";
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::__construct
* @covers MensBeam\Intl\Encoding\XUserDefined::nextCode
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::__construct
* @covers MensBeam\Intl\Encoding\XUserDefined::nextChar
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @coversNothing
*/
public function testSTepBackThroughAString(string $input, array $exp) {
// this test has no meaning for x-user-defined
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::seek
* @covers MensBeam\Intl\Encoding\XUserDefined::posChar
* @covers MensBeam\Intl\Encoding\XUserDefined::posByte
* @covers MensBeam\Intl\Encoding\XUserDefined::rewind
*/
public function testSeekThroughAString() {
return parent::testSeekThroughAString();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::posChar
* @covers MensBeam\Intl\Encoding\XUserDefined::posByte
*/
public function testTraversePastTheEndOfAString() {
return parent::testTraversePastTheEndOfAString();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::peekChar
* @covers MensBeam\Intl\Encoding\XUserDefined::stateSave
* @covers MensBeam\Intl\Encoding\XUserDefined::stateApply
*/
public function testPeekAtCharacters() {
return parent::testPeekAtCharacters();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::peekCode
* @covers MensBeam\Intl\Encoding\XUserDefined::stateSave
* @covers MensBeam\Intl\Encoding\XUserDefined::stateApply
*/
public function testPeekAtCodePoints() {
return parent::testPeekAtCodePoints();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::len
* @covers MensBeam\Intl\Encoding\XUserDefined::stateSave
* @covers MensBeam\Intl\Encoding\XUserDefined::stateApply
*/
public function testGetStringLength(string $input, array $points) {
return parent::testGetStringLength($input, $points);
}
/**
* @coversNothing
*/
public function testReplacementModes() {
$this->assertTrue(true);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::rewind
* @covers MensBeam\Intl\Encoding\XUserDefined::chars
* @covers MensBeam\Intl\Encoding\XUserDefined::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
return parent::testIterateThroughAString($input, $exp);
}
public function provideStrings() {
$a_bytes = [];
$a_codes = [];
for ($a = 0; $a < 0x80; $a++) {
$a_bytes[] = strtoupper(bin2hex(chr($a)));
$a_codes[] = $a;
}
$p_bytes = [];
$p_codes = [];
for ($a = 0; $a < 0x80; $a++) {
$p_bytes[] = strtoupper(bin2hex(chr(0x80 + $a)));
$p_codes[] = 0xF780 + $a;
}
$a_bytes = implode(" ", $a_bytes);
$p_bytes = implode(" ", $p_bytes);
return [
'empty string' => ["", []],
'ASCI bytes' => [$a_bytes, $a_codes],
'private-use bytes' => [$p_bytes, $p_codes],
];
}
}

23
tests/lib/CoderDecoderTest.php

@ -0,0 +1,23 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Test;
use MensBeam\Intl\Encoding\EncoderException;
abstract class CoderDecoderTest extends DecoderTest {
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
$class = $this->testedClass;
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
} else {
$exp = strtolower(str_replace(" ", "", $exp));
}
$out = $class::encode($input, $fatal);
$this->assertSame($exp, bin2hex($out));
}
}

14
tests/lib/EncodingTest.php → tests/lib/DecoderTest.php

@ -6,21 +6,9 @@
declare(strict_types=1);
namespace MensBeam\Intl\Test;
use MensBeam\Intl\Encoding\EncoderException;
use MensBeam\Intl\Encoding\DecoderException;
abstract class EncodingTest extends \PHPUnit\Framework\TestCase {
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
$class = $this->testedClass;
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
} else {
$exp = strtolower(str_replace(" ", "", $exp));
}
$out = $class::encode($input, $fatal);
$this->assertSame($exp, bin2hex($out));
}
abstract class DecoderTest extends \PHPUnit\Framework\TestCase {
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
$class = $this->testedClass;

1
tests/phpunit.xml

@ -22,6 +22,7 @@
<file>cases/Encoding/TestUTF16LE.php</file>
<file>cases/Encoding/TestUTF16BE.php</file>
<file>cases/Encoding/TestSingleByte.php</file>
<file>cases/Encoding/TestXUserDefined.php</file>
<file>cases/Encoding/TestGB18030.php</file>
</testsuite>
</testsuites>

Loading…
Cancel
Save