Browse Source

Partial tests for ASCII spanning

span
J. King 3 years ago
parent
commit
81186973f1
  1. 8
      lib/Encoding/UTF16.php
  2. 9
      tests/cases/Encoding/TestBig5.php
  3. 9
      tests/cases/Encoding/TestEUCJP.php
  4. 9
      tests/cases/Encoding/TestEUCKR.php
  5. 11
      tests/cases/Encoding/TestGB18030.php
  6. 7
      tests/cases/Encoding/TestISO2022JP.php
  7. 7
      tests/cases/Encoding/TestReplacement.php
  8. 9
      tests/cases/Encoding/TestShiftJIS.php
  9. 9
      tests/cases/Encoding/TestSingleByte.php
  10. 2
      tests/cases/Encoding/TestUTF16BE.php
  11. 9
      tests/cases/Encoding/TestUTF16LE.php
  12. 29
      tests/cases/Encoding/TestUTF8.php
  13. 7
      tests/cases/Encoding/TestXUserDefined.php
  14. 22
      tests/lib/DecoderTest.php

8
lib/Encoding/UTF16.php

@ -90,9 +90,9 @@ abstract class UTF16 extends AbstractEncoding {
while (true) {
$c1 = @$this->string[$this->posByte];
$c2 = @$this->string[$this->posByte + 1];
$b = ord(self::BE ? $c1 : $c2);
$b = ord(static::BE ? $c1 : $c2);
if (!$b) {
$c = self::BE ? $c2 : $c1;
$c = static::BE ? $c2 : $c1;
$b = ord($c);
if ($b < 0x80 && strpos($mask, $c) !== false && $c1 !== "" && $c2 !== "") {
$out .= $c;
@ -114,9 +114,9 @@ abstract class UTF16 extends AbstractEncoding {
while (true) {
$c1 = @$this->string[$this->posByte];
$c2 = @$this->string[$this->posByte + 1];
$b = ord(self::BE ? $c1 : $c2);
$b = ord(static::BE ? $c1 : $c2);
if (!$b) {
$c = self::BE ? $c2 : $c1;
$c = static::BE ? $c2 : $c1;
$b = ord($c);
if ($b < 0x80 && strpos($mask, $c) === false && $c1 !== "" && $c2 !== "") {
$out .= $c;

9
tests/cases/Encoding/TestBig5.php

@ -27,6 +27,8 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $seekOffsets = [0, 1, 3, 5, 5, 7, 7, 9];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A D7AA A4F4 30 39";
public function provideCodePoints() {
return [
@ -195,6 +197,13 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\Big5::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @group optional
*/

9
tests/cases/Encoding/TestEUCJP.php

@ -27,6 +27,8 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $seekOffsets = [0, 1, 3, 5, 7, 8, 10, 13];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A 8EDB 8FB0EF 30 39";
public function provideCodePoints() {
return [
@ -206,6 +208,13 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\EUCJP::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @group optional
*/

9
tests/cases/Encoding/TestEUCKR.php

@ -27,6 +27,8 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $seekOffsets = [0, 1, 3, 4, 6, 8, 10, 11];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A E2A9 A5C1 30 39";
public function provideCodePoints() {
return [
@ -185,6 +187,13 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\EUCKR::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @group optional
*/

11
tests/cases/Encoding/TestGB18030.php

@ -23,11 +23,13 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
Char 6 U+FFFE (4 bytes) Offset 19
End of string at char 7, offset 23
*/
protected $seekString = "7A 81 30 84 34 CB AE 94 32 BE 34 84 30 81 30 E3 32 9A 33 84 31 A4 38";
protected $seekString = "7A 81308434 CBAE 9432BE34 84308130 E3329A33 8431A438";
protected $seekCodes = [0x007A, 0x00A2, 0x6C34, 0x1D11E, 0xF8FF, 0x10FFFD, 0xFFFE];
protected $seekOffsets = [0, 1, 5, 7, 11, 15, 19, 23];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A 81308434 CBAE 30 39";
public function tearDown(): void {
$this->testedClass = GB18030::class;
@ -274,6 +276,13 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\GB18030::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @group optional
*/

7
tests/cases/Encoding/TestISO2022JP.php

@ -243,6 +243,13 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::asciiSpan
*/
public function testExtractAsciiSpans() {
$this->markTestIncomplete();
}
/**
* @group optional
*/

7
tests/cases/Encoding/TestReplacement.php

@ -198,4 +198,11 @@ class TestReplacement extends \MensBeam\Intl\Test\DecoderTest {
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::asciiSpan
*/
public function testExtractAsciiSpans() {
$this->markTestIncomplete();
}
}

9
tests/cases/Encoding/TestShiftJIS.php

@ -27,6 +27,8 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $seekOffsets = [0, 1, 2, 4, 6, 7, 8, 10];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A D6 82E6 30 39";
public function provideCodePoints() {
return [
@ -198,6 +200,13 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\ShiftJIS::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @group optional
*/

9
tests/cases/Encoding/TestSingleByte.php

@ -81,6 +81,8 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $seekOffsets = [0, 1, 2, 3, 4, 5, 6, 7];
/* This string is supposed to contain an invalid character sequence sandwiched between two null characters; this is different for each single-byte encoding (and many do not have invalid characters) */
protected $brokenChar = "";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A 80 FF 30 39";
/**
* @dataProvider provideCodePoints
@ -234,6 +236,13 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
public function provideClasses() {
foreach (self::$classes as $name => $class) {
yield $name => [$class];

2
tests/cases/Encoding/TestUTF16BE.php

@ -25,6 +25,8 @@ class TestUTF16BE extends TestUTF16LE {
protected $seekOffsets = [0, 2, 4, 6, 10, 12, 16, 18];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "0000 DC00 0000";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "0041 005A 6C34 D834DD1E 0030 0039";
protected $lowerA = "\x00a";
public function provideStrings() {

9
tests/cases/Encoding/TestUTF16LE.php

@ -25,6 +25,8 @@ class TestUTF16LE extends \MensBeam\Intl\Test\DecoderTest {
protected $seekOffsets = [0, 2, 4, 6, 10, 12, 16, 18];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "0000 00DC 0000";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "4100 5A00 346C 34D81EDD 3000 3900";
protected $lowerA = "a\x00";
/**
@ -133,6 +135,13 @@ class TestUTF16LE extends \MensBeam\Intl\Test\DecoderTest {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
public function provideStrings() {
return [
// control samples

29
tests/cases/Encoding/TestUTF8.php

@ -30,28 +30,6 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest {
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A E6B0B4 F09D849E 30 39";
protected function allBytes(): string {
$out = "";
for ($a = 0x00; $a <= 0xFF; $a++) {
$out .= chr($a);
}
return $out;
}
public function testExtractAsciiSpans() {
$allBytes = $this->allBytes();
$class = $this->testedClass;
$d = new $class($this->prepString($this->spanString));
$this->assertSame("", $d->asciiSpan("az"));
$this->assertSame("A", $d->asciiSpan("AZ", 1));
$this->assertSame("Z", $d->asciiSpan("AZ"));
$this->assertSame("", $d->asciiSpan($allBytes));
$d->nextChar();
$this->assertSame("", $d->asciiSpan($allBytes));
$d->nextChar();
$this->assertSame("09", $d->asciiSpan($allBytes));
}
public function provideCodePoints() {
return [
'U+007A (HTML)' => [false, 0x7A, "7A"],
@ -248,4 +226,11 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest {
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
}

7
tests/cases/Encoding/TestXUserDefined.php

@ -183,4 +183,11 @@ class TestXUserDefined extends \MensBeam\Intl\Test\CoderDecoderTest {
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
}

22
tests/lib/DecoderTest.php

@ -354,7 +354,29 @@ abstract class DecoderTest extends \PHPUnit\Framework\TestCase {
$this->assertSame(sizeof($exp), $a);
}
public function testExtractAsciiSpans() {
$allBytes = $this->allBytes();
$class = $this->testedClass;
$d = new $class($this->prepString($this->spanString));
$this->assertSame("", $d->asciiSpan("az"));
$this->assertSame("A", $d->asciiSpan("AZ", 1));
$this->assertSame("Z", $d->asciiSpan("AZ"));
$this->assertSame("", $d->asciiSpan($allBytes));
$d->nextChar();
$this->assertSame("", $d->asciiSpan($allBytes));
$d->nextChar();
$this->assertSame("09", $d->asciiSpan($allBytes));
}
protected function prepString(string $str): string {
return hex2bin(str_replace(" ", "", $str));
}
protected function allBytes(): string {
$out = "";
for ($a = 0x00; $a <= 0xFF; $a++) {
$out .= chr($a);
}
return $out;
}
}

Loading…
Cancel
Save