From 2e2ed16788b1e88c016a6c3842ce3ea00d325887 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Thu, 25 Mar 2021 15:02:32 -0400 Subject: [PATCH] Tests for ISO-2022-JP spanning --- lib/Encoding/ISO2022JP.php | 58 +++++++++++++++++--------- tests/cases/Encoding/TestISO2022JP.php | 6 ++- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/lib/Encoding/ISO2022JP.php b/lib/Encoding/ISO2022JP.php index b887fee..a195489 100644 --- a/lib/Encoding/ISO2022JP.php +++ b/lib/Encoding/ISO2022JP.php @@ -220,7 +220,7 @@ class ISO2022JP extends AbstractEncoding implements ModalCoder, Decoder { } $effectiveMask = preg_replace($exc, "", $mask); // if the byte after the mode switch is a wanted one, consume it and go back to the start - if (strspn(@$this->string[$this->posByte + 3], $effectiveMask, $this->posByte)) { + if (strspn(@$this->string[$this->posByte + 3], $effectiveMask)) { $out .= $this->nextChar(); if (--$left) { goto Process; @@ -231,28 +231,48 @@ class ISO2022JP extends AbstractEncoding implements ModalCoder, Decoder { } public function asciiSpanNot(string $mask, int $length = null): string { - if ($this->mode === self::ASCII_STATE) { - $mask .= "\x0E\x0F\x1B"; - } elseif ($this->mode === self::ROMAN_STATE) { - $mask .= "\x0E\x0F\x1B\x5C\x7E"; - } else { - // in other modes ASCII characters are never returned - return ""; - } $mask .= self::HIGH_BYTES; - if ($length !== null) { - $len = strcspn($this->string, $mask, $this->posByte, $length); + $out = ""; + $left = ($length === null) ? -1 : $length; + Process: + if ($this->mode === self::KATAKANA_STATE || $this->mode === self::LEAD_BYTE_STATE) { + // these modes will always return an empty span } else { - $len = strcspn($this->string, $mask, $this->posByte); + if ($this->mode === self::ASCII_STATE) { + $effectiveMask = $mask."\x0E\x0F\x1B"; + } elseif ($this->mode === self::ROMAN_STATE) { + $effectiveMask = $mask."\x0E\x0F\x1B\x5C\x7E"; + } + if ($length !== null) { + $len = strcspn($this->string, $effectiveMask, $this->posByte, $left); + } else { + $len = strcspn($this->string, $effectiveMask, $this->posByte); + } + if ($len) { + $out .= substr($this->string, $this->posByte, $len); + $this->posByte += $len; + $this->posChar += $len; + $left -= $len; + } } - if ($len) { - $out = substr($this->string, $this->posByte, $len); - $this->posByte += $len; - $this->posChar += $len; - return $out; - } else { - return ""; + // check if the current position has a mode change to ASCII or Roman modes and is followed by a desired character + if ($left && @$this->string[$this->posByte] === "\x1B") { + if (substr($this->string, $this->posByte + 1, 2) === "\x28\x42") { + $effectiveMask = $mask."\x0E\x0F\x1B"; + } elseif (substr($this->string, $this->posByte + 1, 2) === "\x28\x4A") { + $effectiveMask = $mask."\x0E\x0F\x1B\x5C\x7E"; + } else { + return $out; + } + // if the byte after the mode switch is a wanted one, consume it and go back to the start + if (strcspn(@$this->string[$this->posByte + 3], $effectiveMask)) { + $out .= $this->nextChar(); + if (--$left) { + goto Process; + } + } } + return $out; } protected function stateSave(): array { diff --git a/tests/cases/Encoding/TestISO2022JP.php b/tests/cases/Encoding/TestISO2022JP.php index 8be3d07..89d859b 100644 --- a/tests/cases/Encoding/TestISO2022JP.php +++ b/tests/cases/Encoding/TestISO2022JP.php @@ -31,6 +31,8 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { protected $seekOffsets = [0, 1, 5, 6, 11, 13, 15, 19]; /* This string contains an invalid character sequence sandwiched between two null characters */ protected $brokenChar = "00 FF 00"; + /* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */ + protected $spanString = "1B284A 41 5A 1B2849 5C 5F 1B2842 30 39"; public function provideCodePoints() { return [ @@ -247,14 +249,14 @@ class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest { * @covers MensBeam\Intl\Encoding\ISO2022JP::asciiSpan */ public function testExtractAsciiSpans() { - $this->markTestIncomplete(); + parent::testExtractAsciiSpans(); } /** * @covers MensBeam\Intl\Encoding\ISO2022JP::asciiSpanNot */ public function testExtractNegativeAsciiSpans() { - $this->markTestIncomplete(); + parent::testExtractNegativeAsciiSpans(); } /**