From 81186973f173198827b0423b583e560f3b497420 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Wed, 24 Mar 2021 08:59:59 -0400 Subject: [PATCH] Partial tests for ASCII spanning --- lib/Encoding/UTF16.php | 8 +++---- tests/cases/Encoding/TestBig5.php | 9 +++++++ tests/cases/Encoding/TestEUCJP.php | 9 +++++++ tests/cases/Encoding/TestEUCKR.php | 9 +++++++ tests/cases/Encoding/TestGB18030.php | 11 ++++++++- tests/cases/Encoding/TestISO2022JP.php | 7 ++++++ tests/cases/Encoding/TestReplacement.php | 7 ++++++ tests/cases/Encoding/TestShiftJIS.php | 9 +++++++ tests/cases/Encoding/TestSingleByte.php | 9 +++++++ tests/cases/Encoding/TestUTF16BE.php | 2 ++ tests/cases/Encoding/TestUTF16LE.php | 9 +++++++ tests/cases/Encoding/TestUTF8.php | 29 ++++++----------------- tests/cases/Encoding/TestXUserDefined.php | 7 ++++++ tests/lib/DecoderTest.php | 22 +++++++++++++++++ 14 files changed, 120 insertions(+), 27 deletions(-) diff --git a/lib/Encoding/UTF16.php b/lib/Encoding/UTF16.php index e6876a5..6a60d20 100644 --- a/lib/Encoding/UTF16.php +++ b/lib/Encoding/UTF16.php @@ -90,9 +90,9 @@ abstract class UTF16 extends AbstractEncoding { while (true) { $c1 = @$this->string[$this->posByte]; $c2 = @$this->string[$this->posByte + 1]; - $b = ord(self::BE ? $c1 : $c2); + $b = ord(static::BE ? $c1 : $c2); if (!$b) { - $c = self::BE ? $c2 : $c1; + $c = static::BE ? $c2 : $c1; $b = ord($c); if ($b < 0x80 && strpos($mask, $c) !== false && $c1 !== "" && $c2 !== "") { $out .= $c; @@ -114,9 +114,9 @@ abstract class UTF16 extends AbstractEncoding { while (true) { $c1 = @$this->string[$this->posByte]; $c2 = @$this->string[$this->posByte + 1]; - $b = ord(self::BE ? $c1 : $c2); + $b = ord(static::BE ? $c1 : $c2); if (!$b) { - $c = self::BE ? $c2 : $c1; + $c = static::BE ? $c2 : $c1; $b = ord($c); if ($b < 0x80 && strpos($mask, $c) === false && $c1 !== "" && $c2 !== "") { $out .= $c; diff --git a/tests/cases/Encoding/TestBig5.php b/tests/cases/Encoding/TestBig5.php index e5a8268..cab1492 100644 --- a/tests/cases/Encoding/TestBig5.php +++ b/tests/cases/Encoding/TestBig5.php @@ -27,6 +27,8 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest { protected $seekOffsets = [0, 1, 3, 5, 5, 7, 7, 9]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "41 5A D7AA A4F4 30 39"; public function provideCodePoints() { return [ @@ -195,6 +197,13 @@ class TestBig5 extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\Big5::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } + /** * @group optional */ diff --git a/tests/cases/Encoding/TestEUCJP.php b/tests/cases/Encoding/TestEUCJP.php index 96c8d9d..ea340d2 100644 --- a/tests/cases/Encoding/TestEUCJP.php +++ b/tests/cases/Encoding/TestEUCJP.php @@ -27,6 +27,8 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest { protected $seekOffsets = [0, 1, 3, 5, 7, 8, 10, 13]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "41 5A 8EDB 8FB0EF 30 39"; public function provideCodePoints() { return [ @@ -206,6 +208,13 @@ class TestEUCJP extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\EUCJP::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } + /** * @group optional */ diff --git a/tests/cases/Encoding/TestEUCKR.php b/tests/cases/Encoding/TestEUCKR.php index addffe6..2e05f38 100644 --- a/tests/cases/Encoding/TestEUCKR.php +++ b/tests/cases/Encoding/TestEUCKR.php @@ -27,6 +27,8 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest { protected $seekOffsets = [0, 1, 3, 4, 6, 8, 10, 11]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "41 5A E2A9 A5C1 30 39"; public function provideCodePoints() { return [ @@ -185,6 +187,13 @@ class TestEUCKR extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\EUCKR::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } + /** * @group optional */ diff --git a/tests/cases/Encoding/TestGB18030.php b/tests/cases/Encoding/TestGB18030.php index e8588aa..89c6bd8 100644 --- a/tests/cases/Encoding/TestGB18030.php +++ b/tests/cases/Encoding/TestGB18030.php @@ -23,11 +23,13 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest { Char 6 U+FFFE (4 bytes) Offset 19 End of string at char 7, offset 23 */ - protected $seekString = "7A 81 30 84 34 CB AE 94 32 BE 34 84 30 81 30 E3 32 9A 33 84 31 A4 38"; + protected $seekString = "7A 81308434 CBAE 9432BE34 84308130 E3329A33 8431A438"; protected $seekCodes = [0x007A, 0x00A2, 0x6C34, 0x1D11E, 0xF8FF, 0x10FFFD, 0xFFFE]; protected $seekOffsets = [0, 1, 5, 7, 11, 15, 19, 23]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "41 5A 81308434 CBAE 30 39"; public function tearDown(): void { $this->testedClass = GB18030::class; @@ -274,6 +276,13 @@ class TestGB18030 extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\GB18030::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } + /** * @group optional */ diff --git a/tests/cases/Encoding/TestISO2022JP.php b/tests/cases/Encoding/TestISO2022JP.php index 13d985b..399825e 100644 --- a/tests/cases/Encoding/TestISO2022JP.php +++ b/tests/cases/Encoding/TestISO2022JP.php @@ -243,6 +243,13 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\ISO2022JP::asciiSpan + */ + public function testExtractAsciiSpans() { + $this->markTestIncomplete(); + } + /** * @group optional */ diff --git a/tests/cases/Encoding/TestReplacement.php b/tests/cases/Encoding/TestReplacement.php index 76fe3fd..d4fe70c 100644 --- a/tests/cases/Encoding/TestReplacement.php +++ b/tests/cases/Encoding/TestReplacement.php @@ -198,4 +198,11 @@ class TestReplacement extends \MensBeam\Intl\Test\DecoderTest { public function testSeekBackOverRandomData() { return parent::testSeekBackOverRandomData(); } + + /** + * @covers MensBeam\Intl\Encoding\Replacement::asciiSpan + */ + public function testExtractAsciiSpans() { + $this->markTestIncomplete(); + } } diff --git a/tests/cases/Encoding/TestShiftJIS.php b/tests/cases/Encoding/TestShiftJIS.php index e0e09c9..0b56f07 100644 --- a/tests/cases/Encoding/TestShiftJIS.php +++ b/tests/cases/Encoding/TestShiftJIS.php @@ -27,6 +27,8 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest { protected $seekOffsets = [0, 1, 2, 4, 6, 7, 8, 10]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "41 5A D6 82E6 30 39"; public function provideCodePoints() { return [ @@ -198,6 +200,13 @@ class TestShiftJIS extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\ShiftJIS::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } + /** * @group optional */ diff --git a/tests/cases/Encoding/TestSingleByte.php b/tests/cases/Encoding/TestSingleByte.php index 5ca46e2..a3917d0 100644 --- a/tests/cases/Encoding/TestSingleByte.php +++ b/tests/cases/Encoding/TestSingleByte.php @@ -81,6 +81,8 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest { protected $seekOffsets = [0, 1, 2, 3, 4, 5, 6, 7]; /* This string is supposed to contain an invalid character sequence sandwiched between two null characters; this is different for each single-byte encoding (and many do not have invalid characters) */ protected $brokenChar = ""; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "41 5A 80 FF 30 39"; /** * @dataProvider provideCodePoints @@ -234,6 +236,13 @@ class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\SingleByteEncoding::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } + public function provideClasses() { foreach (self::$classes as $name => $class) { yield $name => [$class]; diff --git a/tests/cases/Encoding/TestUTF16BE.php b/tests/cases/Encoding/TestUTF16BE.php index c77ab04..6b7d117 100644 --- a/tests/cases/Encoding/TestUTF16BE.php +++ b/tests/cases/Encoding/TestUTF16BE.php @@ -25,6 +25,8 @@ class TestUTF16BE extends TestUTF16LE { protected $seekOffsets = [0, 2, 4, 6, 10, 12, 16, 18]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "0000 DC00 0000"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "0041 005A 6C34 D834DD1E 0030 0039"; protected $lowerA = "\x00a"; public function provideStrings() { diff --git a/tests/cases/Encoding/TestUTF16LE.php b/tests/cases/Encoding/TestUTF16LE.php index 35bf317..6cf4bbf 100644 --- a/tests/cases/Encoding/TestUTF16LE.php +++ b/tests/cases/Encoding/TestUTF16LE.php @@ -25,6 +25,8 @@ class TestUTF16LE extends \MensBeam\Intl\Test\DecoderTest { protected $seekOffsets = [0, 2, 4, 6, 10, 12, 16, 18]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "0000 00DC 0000"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "4100 5A00 346C 34D81EDD 3000 3900"; protected $lowerA = "a\x00"; /** @@ -133,6 +135,13 @@ class TestUTF16LE extends \MensBeam\Intl\Test\DecoderTest { return parent::testSeekBackOverRandomData(); } + /** + * @covers MensBeam\Intl\Encoding\UTF16::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } + public function provideStrings() { return [ // control samples diff --git a/tests/cases/Encoding/TestUTF8.php b/tests/cases/Encoding/TestUTF8.php index 66cf196..1af1839 100644 --- a/tests/cases/Encoding/TestUTF8.php +++ b/tests/cases/Encoding/TestUTF8.php @@ -30,28 +30,6 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest { /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ protected $spanString = "41 5A E6B0B4 F09D849E 30 39"; - protected function allBytes(): string { - $out = ""; - for ($a = 0x00; $a <= 0xFF; $a++) { - $out .= chr($a); - } - return $out; - } - - public function testExtractAsciiSpans() { - $allBytes = $this->allBytes(); - $class = $this->testedClass; - $d = new $class($this->prepString($this->spanString)); - $this->assertSame("", $d->asciiSpan("az")); - $this->assertSame("A", $d->asciiSpan("AZ", 1)); - $this->assertSame("Z", $d->asciiSpan("AZ")); - $this->assertSame("", $d->asciiSpan($allBytes)); - $d->nextChar(); - $this->assertSame("", $d->asciiSpan($allBytes)); - $d->nextChar(); - $this->assertSame("09", $d->asciiSpan($allBytes)); - } - public function provideCodePoints() { return [ 'U+007A (HTML)' => [false, 0x7A, "7A"], @@ -248,4 +226,11 @@ class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest { public function testSeekBackOverRandomData() { return parent::testSeekBackOverRandomData(); } + + /** + * @covers MensBeam\Intl\Encoding\UTF8::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } } diff --git a/tests/cases/Encoding/TestXUserDefined.php b/tests/cases/Encoding/TestXUserDefined.php index 3f7a689..7f196fd 100644 --- a/tests/cases/Encoding/TestXUserDefined.php +++ b/tests/cases/Encoding/TestXUserDefined.php @@ -183,4 +183,11 @@ class TestXUserDefined extends \MensBeam\Intl\Test\CoderDecoderTest { public function testSeekBackOverRandomData() { return parent::testSeekBackOverRandomData(); } + + /** + * @covers MensBeam\Intl\Encoding\XUserDefined::asciiSpan + */ + public function testExtractAsciiSpans() { + parent::testExtractAsciiSpans(); + } } diff --git a/tests/lib/DecoderTest.php b/tests/lib/DecoderTest.php index 6f2b50c..451edc8 100644 --- a/tests/lib/DecoderTest.php +++ b/tests/lib/DecoderTest.php @@ -354,7 +354,29 @@ abstract class DecoderTest extends \PHPUnit\Framework\TestCase { $this->assertSame(sizeof($exp), $a); } + public function testExtractAsciiSpans() { + $allBytes = $this->allBytes(); + $class = $this->testedClass; + $d = new $class($this->prepString($this->spanString)); + $this->assertSame("", $d->asciiSpan("az")); + $this->assertSame("A", $d->asciiSpan("AZ", 1)); + $this->assertSame("Z", $d->asciiSpan("AZ")); + $this->assertSame("", $d->asciiSpan($allBytes)); + $d->nextChar(); + $this->assertSame("", $d->asciiSpan($allBytes)); + $d->nextChar(); + $this->assertSame("09", $d->asciiSpan($allBytes)); + } + protected function prepString(string $str): string { return hex2bin(str_replace(" ", "", $str)); } + + protected function allBytes(): string { + $out = ""; + for ($a = 0x00; $a <= 0xFF; $a++) { + $out .= chr($a); + } + return $out; + } }