Browse Source

Prototype strspn equivalent

span
J. King 3 years ago
parent
commit
bf81571ce4
  1. 20
      lib/Encoding/AbstractEncoding.php
  2. 22
      lib/Encoding/Decoder.php
  3. 34
      lib/Encoding/ISO2022JP.php
  4. 8
      lib/Encoding/Replacement.php
  5. 48
      lib/Encoding/UTF16.php

20
lib/Encoding/AbstractEncoding.php

@ -11,6 +11,8 @@ abstract class AbstractEncoding implements Decoder {
protected const MODE_REPLACE = 1;
protected const MODE_FATAL = 2;
protected const HIGH_BYTES = "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
/** @var string $string The string being decoded */
protected $string;
/** @var int $posByte The current byte position in the string */
@ -162,6 +164,24 @@ abstract class AbstractEncoding implements Decoder {
}
}
public function asciiSpan(string $mask, int $length = null): string {
$mask = preg_replace('/[\x80-\xFF]/gs', "", $mask);
$len = strspn($this->string, $mask, $this->posByte, $length);
$out = substr($this->string, $this->posByte, $len);
$this->posByte += $len;
$this->posChar += $len;
return $out;
}
public function asciiSpanNot(string $mask, int $length = null): string {
$mask .= self::HIGH_BYTES;
$len = strcspn($this->string, $mask, $this->posByte, $length);
$out = substr($this->string, $this->posByte, $len);
$this->posByte += $len;
$this->posChar += $len;
return $out;
}
/** Returns a copy of the decoder's state to keep in memory */
protected function stateSave(): array {
$out = ['errCount' => sizeof($this->errStack)];

22
lib/Encoding/Decoder.php

@ -80,4 +80,26 @@ interface Decoder {
/** Generates an iterator which steps through each code point in the string */
public function codes(): \Generator;
/** Fast-forwards through a span of ASCII characters matching the supplied mask, returning any consumed characters
*
* The mask must consist only of ASCII characters.
*
* Note that if the empty string is returned, this does not necessarily signal the end of the string
*
* @param string $mask The set of ASCII characters to match
* @param int $length The maximum number oof characters to advance by
*/
public function asciiSpan(string $mask, int $length = null): string;
/** Fast-forwards through a span of ASCII characters not matching the supplied mask, returning any consumed characters
*
* The mask must consist only of ASCII characters.
*
* Note that if the empty string is returned, this does not necessarily signal the end of the string
*
* @param string $mask The set of ASCII characters to not match
* @param int $length The maximum number oof characters to advance by
*/
public function asciiSpanNot(string $mask, int $length = null): string;
}

34
lib/Encoding/ISO2022JP.php

@ -184,6 +184,40 @@ class ISO2022JP extends AbstractEncoding implements ModalCoder, Decoder {
return $distance;
}
public function asciiSpan(string $mask, int $length = null): string {
if ($this->mode === self::ASCII_STATE) {
$exc = '/[\x0E\x0F\x1B\x80-\xFF]/gs';
} elseif ($this->mode === self::ROMAN_STATE) {
$exc = '/[\x0E\x0F\x1B\x5C\x7E\x80-\xFF]/gs';
} else {
// in other modes ASCII characters are never returned
return "";
}
$mask = preg_replace($exc, "", $mask);
$len = strspn($this->string, $mask, $this->posByte, $length);
$out = substr($this->string, $this->posByte, $len);
$this->posByte += $len;
$this->posChar += $len;
return $out;
}
public function asciiSpanNot(string $mask, int $length = null): string {
if ($this->mode === self::ASCII_STATE) {
$mask .= "\x0E\x0F\x1B";
} elseif ($this->mode === self::ROMAN_STATE) {
$mask .= "\x0E\x0F\x1B\x5C\x7E";
} else {
// in other modes ASCII characters are never returned
return "";
}
$mask .= self::HIGH_BYTES;
$len = strcspn($this->string, $mask, $this->posByte, $length);
$out = substr($this->string, $this->posByte, $len);
$this->posByte += $len;
$this->posChar += $len;
return $out;
}
protected function stateSave(): array {
$out = parent::stateSave();
$out['modeCount'] = sizeof($this->modeStack);

8
lib/Encoding/Replacement.php

@ -122,4 +122,12 @@ class Replacement implements Decoder {
yield 0 => $this->nextCode();
}
}
public function asciiSpan(string $mask, int $length = null): string {
return "";
}
public function asciiSpanNot(string $mask, int $length = null): string {
return "";
}
}

48
lib/Encoding/UTF16.php

@ -84,6 +84,54 @@ abstract class UTF16 extends AbstractEncoding {
}
}
public function asciiSpan(string $mask, int $length = null): string {
// UTF-16 has no ASCII characters, so we must do things the hard way
$out = "";
while (true) {
$c1 = @$this->string[$this->posByte];
$c2 = @$this->string[$this->posByte + 1];
$b = ord(self::BE ? $c1 : $c2);
if (!$b) {
$c = self::BE ? $c2 : $c1;
$b = ord($c);
if ($b < 0x80 && strpos($mask, $c) !== false && $c1 !== "" && $c2 !== "") {
$out .= $c;
$this->posByte += 2;
$this->posChar++;
} else {
break;
}
} else {
break;
}
}
return $out;
}
public function asciiSpanNot(string $mask, int $length = null): string {
// this is a copy of asciiSpan above with only the strpos check reversed
$out = "";
while (true) {
$c1 = @$this->string[$this->posByte];
$c2 = @$this->string[$this->posByte + 1];
$b = ord(self::BE ? $c1 : $c2);
if (!$b) {
$c = self::BE ? $c2 : $c1;
$b = ord($c);
if ($b < 0x80 && strpos($mask, $c) === false && $c1 !== "" && $c2 !== "") {
$out .= $c;
$this->posByte += 2;
$this->posChar++;
} else {
break;
}
} else {
break;
}
}
return $out;
}
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
if ($this->dirtyEOF && $distance) {

Loading…
Cancel
Save