diff --git a/lib/Encoding/ISO2022JP.php b/lib/Encoding/ISO2022JP.php index ad38a74..793c455 100644 --- a/lib/Encoding/ISO2022JP.php +++ b/lib/Encoding/ISO2022JP.php @@ -185,28 +185,48 @@ class ISO2022JP extends AbstractEncoding implements ModalCoder, Decoder { } public function asciiSpan(string $mask, int $length = null): string { - if ($this->mode === self::ASCII_STATE) { - $exc = '/[\x0E\x0F\x1B\x80-\xFF]/s'; - } elseif ($this->mode === self::ROMAN_STATE) { - $exc = '/[\x0E\x0F\x1B\x5C\x7E\x80-\xFF]/s'; - } else { - // in other modes ASCII characters are never returned - return ""; - } - $mask = preg_replace($exc, "", $mask); - if ($length !== null) { - $len = strspn($this->string, $mask, $this->posByte, $length); + $out = ""; + Process: + if ($this->mode === self::KATAKANA_STATE || $this->mode === self::LEAD_BYTE_STATE) { + // these modes will always return an empty span } else { - $len = strspn($this->string, $mask, $this->posByte); + if ($this->mode === self::ASCII_STATE) { + $exc = '/[\x0E\x0F\x1B\x80-\xFF]/s'; + } elseif ($this->mode === self::ROMAN_STATE) { + $exc = '/[\x0E\x0F\x1B\x5C\x7E\x80-\xFF]/s'; + } else { + // in other modes ASCII characters are never returned + return ""; + } + $effectiveMask = preg_replace($exc, "", $mask); + if ($length !== null) { + $len = strspn($this->string, $effectiveMask, $this->posByte, $length); + } else { + $len = strspn($this->string, $effectiveMask, $this->posByte); + } + if ($len) { + $out .= substr($this->string, $this->posByte, $len); + $this->posByte += $len; + $this->posChar += $len; + } } - if ($len) { - $out = substr($this->string, $this->posByte, $len); - $this->posByte += $len; - $this->posChar += $len; - return $out; - } else { - return ""; + // check if the current position has a mode change to ASCII or Roman modes and is followed by a desired character + if (@$this->string[$this->posByte] === "\x1B") { + if (substr($this->string, $this->posByte + 1, 2) === "\x28\x42") { + $exc = '/[\x0E\x0F\x1B\x80-\xFF]/s'; + } elseif (substr($this->string, $this->posByte + 1, 2) === "\x28\x4A") { + $exc = '/[\x0E\x0F\x1B\x5C\x7E\x80-\xFF]/s'; + } else { + return $out; + } + $effectiveMask = preg_replace($exc, "", $mask); + // if the byte after the mode switch is a wanted one, consume it and go back to the start + if (strspn(@$this->string[$this->posByte + 3], $effectiveMask, $this->posByte)) { + $out .= $this->nextChar(); + goto Process; + } } + return $out; } public function asciiSpanNot(string $mask, int $length = null): string {