Browse Source

Tweaks and cleanup

labels
J. King 6 years ago
parent
commit
3698aa8d8d
  1. 1
      RoboFile.php
  2. 49
      lib/UTF8.php
  3. 2
      perf/perf.php

1
RoboFile.php

@ -98,6 +98,5 @@ class RoboFile extends \Robo\Tasks {
$execpath = realpath(self::BASE."vendor-bin/phpunit/vendor/phpunit/phpunit/phpunit");
$confpath = realpath(self::BASE_TEST."phpunit.xml");
return $this->taskExec($executor)->arg($execpath)->option("-c", $confpath)->args(array_merge($set,$args))->run();
}
}

49
lib/UTF8.php

@ -24,28 +24,30 @@ abstract class UTF8 {
start:
// get the byte at the specified position
$b = @$string[$pos];
if ($b < "\x80") {
if (ord($b) < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
$next = $pos + 1;
return $b;
} else {
$errMode = $errMode ?? self::$errMode;
// otherwise determine the numeric code point of the character, as well as the position of the next character
$p = self::ord($string, $pos, $next, self::M_REPLACE);
if (is_int($p)) {
// if the character is valid, return its serialization
// we do a round trip (bytes > code point > bytes) to normalize overlong sequences
return self::chr($p);
} elseif ($errMode==self::M_REPLACE) {
// if the byte is invalid and we're supposed to replace, return a replacement character
return self::$replacementChar;
} elseif ($errMode==self::M_SKIP) {
// if the character is invalid and we're supposed to skip invalid characters, advance the position and start over
$pos = $next;
goto start;
} else {
// if the byte is invalid and we're supposed to halt, halt
throw new \Exception;
$errMode = $errMode ?? self::$errMode;
if ($errMode==self::M_REPLACE) {
// if the byte is invalid and we're supposed to replace, return a replacement character
return self::$replacementChar;
} elseif ($errMode==self::M_SKIP) {
// if the character is invalid and we're supposed to skip invalid characters, advance the position and start over
$pos = $next;
goto start;
} else {
// if the byte is invalid and we're supposed to halt, halt
throw new \Exception;
}
}
}
}
@ -189,9 +191,9 @@ abstract class UTF8 {
if ($b=="") {
$next = $pos + 1;
return null;
} elseif ($b < "\x80") {
} elseif (($b = ord($b)) < 0x80) {
$next = $pos + 1;
return ord($b);
return $b;
}
$point = 0;
$seen = 0;
@ -279,25 +281,4 @@ abstract class UTF8 {
}
return $bytes;
}
/**
* Returns the expected byte length of a UTF-8 character starting with byte $b
*
* If the byte is not the start of a UTF-8 sequence, 0 is returned
*/
protected static function l($b): int {
if ($b >= 0xC2 && $b <= 0xDF) { // two-byte character
return 2;
} elseif ($b >= 0xE0 && $b <= 0xEF) { // three-byte character
return 3;
} elseif ($b >= 0xF0 && $b <= 0xF4) { // four-byte character
return 4;
} elseif ($b < 0x80) { // ASCII byte: one-byte character
return 1;
} elseif ($b==="") { // end of input: pretend it's a valid single-byte character
return 1;
} else { // invalid byte
return 0;
}
}
}

2
perf/perf.php

@ -73,7 +73,7 @@ foreach($files as $fName => $file) {
$t[$a] = microtime(true) - $s;
}
$t = array_sum($t) / sizeof($t);
echo number_format($t, 3)." ($n characters)\n";
echo number_format($t, 3)."\n";
}
}
}

Loading…
Cancel
Save