diff --git a/lib/Grammar.php b/lib/Grammar.php
index f4167c0..97185b7 100644
--- a/lib/Grammar.php
+++ b/lib/Grammar.php
@@ -203,7 +203,7 @@ class Grammar {
                     $value = preg_replace_callback('/\\\x\{([0-9A-Fa-f]+)\}/', function($matches) {
                         return "\x{" . (((int)base_convert($matches[1], 16, 10) > 0x10ffff) ? '10ffff' : $matches[1]) . "}";
                     }, $value);
-                    $p['match'] = "/$value/u";
+                    $p['match'] = "/$value/Su";
 
                     $modified = true;
                 break;
diff --git a/lib/Highlight.php b/lib/Highlight.php
index 82e666f..a1abe88 100644
--- a/lib/Highlight.php
+++ b/lib/Highlight.php
@@ -24,7 +24,7 @@ class Highlight {
         $tokenList = $tokenizer->tokenize();
 
         foreach ($tokenList as $lineNumber => $tokens) {
-            if ($lineNumber === 20) {
+            if ($lineNumber === 24) {
                 var_export($tokens);
                 echo "\n";
                 die();
diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php
index bc3ca33..9d6365f 100644
--- a/lib/Tokenizer.php
+++ b/lib/Tokenizer.php
@@ -118,8 +118,10 @@ class Tokenizer {
 
                     // If the rule is a Pattern
                     if ($rule instanceof Pattern) {
-                        // Throw out pattern regexes with anchors that should match the current line.
-                        // This is necessary because the tokenizer is fed data line by line.
+                        // Throw out pattern regexes with anchors that shouldn't match the current line.
+                        // This is necessary because the tokenizer is fed data line by line and
+                        // therefore anchors that match the beginning of the document and the end won't
+                        // do anything.
                         if (preg_match(self::ANCHOR_CHECK_REGEX, $rule->match, $validRegexMatch) === 1) {
                             if (
                                 // \A anchors match the beginning of the whole string, not just this line
@@ -201,7 +203,10 @@ class Tokenizer {
                 // create tokens from the captures.
                 if ($pattern->captures !== null) {
                     foreach ($match as $k => $m) {
-                        if ($m[0] === '' || ($k === 0 && !isset($pattern->captures[0]))) {
+                        // If either the capture match is empty, there's no pattern capture for this
+                        // match, or the match being processed is the first one and there are no
+                        // captures for it then continue onto the next one.
+                        if ($m[0] === '' || $m[1] < 0 || !isset($pattern->captures[$k]) || ($k === 0 && !isset($pattern->captures[0]))) {
                             continue;
                         }
 
@@ -345,8 +350,10 @@ class Tokenizer {
                 if ($pattern->patterns !== null && $this->offset < $lineLength) {
                     // If the pattern has just a regular match (meaning neither a begin nor an end
                     // pattern) but has subpatterns then only tokenize the part of the line that's
-                    // within the match.
-                    $tokens = [ ...$tokens, ...$this->tokenizeLine($line, (!$pattern->beginPattern && !$pattern->endPattern) ? strlen($match[0][0]) : 0) ];
+                    // within the match. Otherwise, tokenize up to the line's length. Because of
+                    // recursion, the line length could be set by this step before or within the
+                    // capture tokenization process.
+                    $tokens = [ ...$tokens, ...$this->tokenizeLine($line, (!$pattern->beginPattern && !$pattern->endPattern) ? strlen($match[0][0]) : $lineLength) ];
                 }
 
                 // If the offset is before the end of the match then create a token from the