Tweaks and cleanup

2018-04-25 14:54:44 -04:00 · 2018-04-25 14:54:44 -04:00 · 3698aa8d8d
commit 3698aa8d8d
parent 84d103269f
3 changed files with 16 additions and 36 deletions
--- a/RoboFile.php
+++ b/RoboFile.php
@ -98,6 +98,5 @@ class RoboFile extends \Robo\Tasks {
        $execpath = realpath(self::BASE."vendor-bin/phpunit/vendor/phpunit/phpunit/phpunit");
        $confpath = realpath(self::BASE_TEST."phpunit.xml");
        return $this->taskExec($executor)->arg($execpath)->option("-c", $confpath)->args(array_merge($set,$args))->run();
-
    }
 }
--- a/lib/UTF8.php
+++ b/lib/UTF8.php
@ -24,19 +24,20 @@ abstract class UTF8 {
        start:
        // get the byte at the specified position
        $b = @$string[$pos];
-        if ($b < "\x80") {
+        if (ord($b) < 0x80) {
            // if the byte is an ASCII character or end of input, simply return it
            $next = $pos + 1;
            return $b;
        } else {
-            $errMode = $errMode ?? self::$errMode;
            // otherwise determine the numeric code point of the character, as well as the position of the next character
            $p = self::ord($string, $pos, $next, self::M_REPLACE);
            if (is_int($p)) {
                // if the character is valid, return its serialization
                // we do a round trip (bytes > code point > bytes) to normalize overlong sequences
                return self::chr($p);
-            } elseif ($errMode==self::M_REPLACE) {
+            } else {
+                $errMode = $errMode ?? self::$errMode;
+                if ($errMode==self::M_REPLACE) {
                    // if the byte is invalid and we're supposed to replace, return a replacement character
                    return self::$replacementChar;
                } elseif ($errMode==self::M_SKIP) {
@ -49,6 +50,7 @@ abstract class UTF8 {
                }
            }
        }
+    }

    /** Starting from byte offset $pos, advance $num characters through $string and return the byte offset of the found character 
     * 
@ -189,9 +191,9 @@ abstract class UTF8 {
        if ($b=="") {
            $next = $pos + 1;
            return null;
-        } elseif ($b < "\x80") {
+        } elseif (($b = ord($b)) < 0x80) {
            $next = $pos + 1;
-            return ord($b);
+            return $b;
        }
        $point = 0;
        $seen = 0;
@ -279,25 +281,4 @@ abstract class UTF8 {
        }
        return $bytes;
    }
-
-    /** 
-     * Returns the expected byte length of a UTF-8 character starting with byte $b 
-     * 
-     * If the byte is not the start of a UTF-8 sequence, 0 is returned
-     */
-    protected static function l($b): int {
-        if ($b >= 0xC2 && $b <= 0xDF) { // two-byte character
-            return 2;
-        } elseif ($b >= 0xE0 && $b <= 0xEF) { // three-byte character
-            return 3;
-        } elseif ($b >= 0xF0 && $b <= 0xF4) { // four-byte character
-            return 4;
-        } elseif ($b < 0x80) { // ASCII byte: one-byte character
-            return 1;
-        } elseif ($b==="") { // end of input: pretend it's a valid single-byte character
-            return 1;
-        } else { // invalid byte
-            return 0;
-        }
-    }
 }
--- a/perf/perf.php
+++ b/perf/perf.php
@ -73,7 +73,7 @@ foreach($files as $fName => $file) {
                $t[$a] = microtime(true) - $s;
            }
            $t = array_sum($t) / sizeof($t);
-            echo number_format($t, 3)." ($n characters)\n";
+            echo number_format($t, 3)."\n";
        }
    }
 }