diff --git a/composer.json b/composer.json
index 8e7dd20..ce2898a 100644
--- a/composer.json
+++ b/composer.json
@@ -12,9 +12,10 @@
     ],
     "require": {
         "php": "^7.4 || ^8.0",
+        "ext-dom": "*",
         "ext-intl": "*",
         "ext-json": "*",
-        "ext-dom": "*",
+        "ext-mbstring": "*",
         "docopt/docopt": "^1.0"
     },
     "autoload": {
diff --git a/composer.lock b/composer.lock
index adf82a0..2fa9735 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
         "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
         "This file is @generated automatically"
     ],
-    "content-hash": "eb84c086d7c773cf5f8ad5ad2b9e546e",
+    "content-hash": "7f3c88aa5023ebb6ebad3e513973d927",
     "packages": [
         {
             "name": "docopt/docopt",
@@ -65,10 +65,11 @@
     "prefer-lowest": false,
     "platform": {
         "php": "^7.4 || ^8.0",
+        "ext-dom": "*",
         "ext-intl": "*",
         "ext-json": "*",
-        "ext-dom": "*"
+        "ext-mbstring": "*"
     },
     "platform-dev": [],
-    "plugin-api-version": "2.1.0"
+    "plugin-api-version": "2.0.0"
 }
diff --git a/lib/Data.php b/lib/Data.php
index 54ffe3c..dd1dc35 100644
--- a/lib/Data.php
+++ b/lib/Data.php
@@ -8,22 +8,26 @@ namespace dW\Lit;
 
 
 class Data {
-    public static function fileToGenerator(string $filepath): \Generator {
+    public static function fileToGenerator(string $filepath, string $encoding = 'UTF-8'): \Generator {
         $lineNumber = 0;
         $fp = fopen($filepath, 'r');
         try {
             while ($line = fgets($fp)) {
-                yield ++$lineNumber => $line;
+                // Lines are converted to UTF-32 because everything in UTF-32 is 4 bytes, making
+                // converting byte offsets to character offsets as easy as dividing by 4.
+                yield ++$lineNumber => mb_convert_encoding($line, 'UTF-32', $encoding);
             }
         } finally {
             fclose($fp);
         }
     }
 
-    public static function stringToGenerator(string $string): \Generator {
+    public static function stringToGenerator(string $string, string $encoding = 'UTF-8'): \Generator {
         $string = explode("\n", $string);
         foreach ($string as $lineNumber => $line) {
-            yield $lineNumber + 1 => $line;
+            // Lines are converted to UTF-32 because everything in UTF-32 is 4 bytes, making
+            // converting byte offsets to character offsets as easy as dividing by 4.
+            yield $lineNumber + 1 => mb_convert_encoding($line, 'UTF-32', $encoding);
         }
     }
 }
\ No newline at end of file
diff --git a/lib/Grammar.php b/lib/Grammar.php
index 6af9875..7c74b12 100644
--- a/lib/Grammar.php
+++ b/lib/Grammar.php
@@ -148,7 +148,7 @@ class Grammar {
     }
 
 
-    protected function parseJSONPattern(array $pattern, string $filename): Pattern|Reference|\WeakReference|null {
+    protected function parseJSONPattern(array $pattern, string $filename): Pattern|Reference|null {
         if (isset($pattern['include'])) {
             if ($pattern['include'][0] === '#') {
                 return new RepositoryReference(substr($pattern['include'], 1), $this);
@@ -165,40 +165,82 @@ class Grammar {
             'ownerGrammar' => $this,
             'name' => null,
             'contentName' => null,
-            'begin' => null,
-            'end' => null,
             'match' => null,
             'patterns' => null,
             'captures' => null,
-            'beginCaptures' => null,
-            'endCaptures' => null,
-            'applyEndPatternLast' => false
+            'endPattern' => false
         ];
 
         $modified = false;
+
+        $applyEndPatternLast = false;
+        if (isset($pattern['applyEndPatternLast'])) {
+            $applyEndPatternLast = $pattern['applyEndPatternLast'];
+            if (!is_bool($applyEndPatternLast) || (!is_int($applyEndPatternLast) && ($applyEndPatternLast !== 0 && $applyEndPatternLast !== 1))) {
+                throw new Exception(Exception::JSON_INVALID_TYPE, 'Boolean, 0, or 1', 'applyEndPatternLast', gettype($applyEndPatternLast), $filename);
+            }
+
+            $applyEndPatternLast = (bool)$applyEndPatternLast;
+        }
+
+        // Begin and end matches are handled in this implementation by parsing begin
+        // matches as regular matches and appending the end match as a pattern to the
+        // end of the pattern's patterns.
+        if (isset($pattern['begin'])) {
+            if (!isset($pattern['end'])) {
+                throw new Exception(Exception::JSON_MISSING_PROPERTY, $filename, 'end');
+            }
+
+            $begin = $pattern['begin'];//str_replace('/', '\/', $pattern['begin']);
+            $p['match'] = $begin;//"/$begin/";
+            $modified = true;
+
+            if (isset($pattern['beginCaptures'])) {
+                $pattern['captures'] = $pattern['beginCaptures'];
+            } elseif (isset($pattern['captures'])) {
+                $pattern['captures'] = $pattern['captures'];
+            }
+
+            $endCaptures = null;
+            if (isset($pattern['endCaptures'])) {
+                $endCaptures = $pattern['endCaptures'];
+            } elseif (isset($pattern['captures'])) {
+                $endCaptures = $pattern['captures'];
+            }
+
+            $endPattern = [
+                'match' => $pattern['end'],//"/" . str_replace('/', '\/', $pattern['end']) . "/",
+                'endPattern' => true
+            ];
+
+            if ($endCaptures !== null) {
+                $endPattern['captures'] = $endCaptures;
+            }
+
+            if (isset($pattern['patterns'])) {
+                if ($applyEndPatternLast) {
+                    $pattern['patterns'][] = $endPattern;
+                } else {
+                    array_unshift($pattern['patterns'], $endPattern);
+                }
+            } else {
+                $pattern['patterns'] = [ $endPattern ];
+            }
+        }
+
         foreach ($pattern as $key => $value) {
             switch ($key) {
-                case 'applyEndPatternLast':
-                    if (!is_bool($value) || (!is_int($value) && ($value !== 0 && $value !== 1))) {
-                        throw new Exception(Exception::JSON_INVALID_TYPE, 'Boolean, 0, or 1', 'applyEndPatternLast', gettype($value), $filename);
-                    }
-
-                    $value = (bool)$value;
                 case 'name':
                 case 'contentName':
                     $p[$key] = $value;
                     $modified = true;
                 break;
-                case 'begin':
-                case 'end':
                 case 'match':
-                    $value = str_replace('/', '\/', $value);
-                    $p[$key] = "/$value/";
+                    //$value = str_replace('/', '\/', $value);
+                    $p['match'] = $value;//"/$value/";
                     $modified = true;
                 break;
                 case 'captures':
-                case 'beginCaptures':
-                case 'endCaptures':
                     if (!is_array($value)) {
                         throw new Exception(Exception::JSON_INVALID_TYPE, 'Array', $key, gettype($value), $filename);
                     }
@@ -240,7 +282,7 @@ class Grammar {
         return ($modified) ? new Pattern(...$p) : null;
     }
 
-    protected function parseJSONPatternList(array $list, string $filename): Pattern|PatternList|null {
+    protected function parseJSONPatternList(array $list, string $filename): ?PatternList {
         $result = [];
         foreach ($list as $pattern) {
             $p = $this->parseJSONPattern($pattern, $filename);
diff --git a/lib/Grammar/Pattern.php b/lib/Grammar/Pattern.php
index 976b8d6..c4434d4 100644
--- a/lib/Grammar/Pattern.php
+++ b/lib/Grammar/Pattern.php
@@ -11,29 +11,22 @@ use dW\Lit\Grammar;
 /** Contains patterns responsible for matching a portion of the document */
 class Pattern extends Rule {
     protected bool $_applyEndPatternLast = false;
-    protected ?string $_begin;
-    protected ?CaptureList $_beginCaptures;
     protected ?CaptureList $_captures;
     protected ?string $_contentName;
-    protected ?string $_end;
-    protected ?CaptureList $_endCaptures;
+    protected bool $_endPattern = false;
     protected ?string $_match;
     protected ?string $_name;
     protected \WeakReference $_ownerGrammar;
     protected ?PatternList $_patterns;
 
 
-    public function __construct(Grammar $ownerGrammar, ?string $name = null, ?string $contentName = null, ?string $begin = null, ?string $end = null, ?string $match = null, ?PatternList $patterns = null, ?CaptureList $captures = null, ?CaptureList $beginCaptures = null, ?CaptureList $endCaptures = null, bool $applyEndPatternLast = false) {
+    public function __construct(Grammar $ownerGrammar, ?string $name = null, ?string $contentName = null, ?string $match = null, ?PatternList $patterns = null, ?CaptureList $captures = null, bool $endPattern = false) {
         $this->_name = $name;
         $this->_contentName = $contentName;
-        $this->_begin = $begin;
-        $this->_end = $end;
         $this->_match = $match;
         $this->_patterns = $patterns;
         $this->_captures = $captures;
-        $this->_beginCaptures = $beginCaptures;
-        $this->_endCaptures = $endCaptures;
-        $this->_applyEndPatternLast = $applyEndPatternLast;
+        $this->_endPattern = $endPattern;
         $this->_ownerGrammar = ($ownerGrammar === null) ? null : \WeakReference::create($ownerGrammar);
     }
 }
\ No newline at end of file
diff --git a/lib/Highlight.php b/lib/Highlight.php
index 5ff651d..eb3e56f 100644
--- a/lib/Highlight.php
+++ b/lib/Highlight.php
@@ -9,26 +9,30 @@ use dW\Lit\Grammar\Exception;
 
 
 class Highlight {
-    public static function withFile(string $filepath, string $scopeName) {
-        return self::highlight(Data::fileToGenerator($filepath), $scopeName);
+    public static function withFile(string $filepath, string $scopeName, string $encoding = 'UTF-8') {
+        return self::highlight(Data::fileToGenerator($filepath, $encoding), $scopeName, $encoding);
     }
 
-    public static function withString(string $string, string $scopeName) {
-        return self::highlight(Data::stringToGenerator($string), $scopeName);
+    public static function withString(string $string, string $scopeName, string $encoding = 'UTF-8') {
+        return self::highlight(Data::stringToGenerator($string, $encoding), $scopeName, $encoding);
     }
 
 
-    protected static function highlight(\Generator $data, string $scopeName) {
+    protected static function highlight(\Generator $data, string $scopeName, string $encoding) {
         $grammar = GrammarRegistry::get($scopeName);
         if ($grammar === false) {
             throw new Exception(Exception::GRAMMAR_MISSING, $scopeName);
         }
 
-        $tokenizer = new Tokenizer($data, $grammar);
+        mb_regex_encoding('UTF-32');
+
+        $tokenizer = new Tokenizer($data, $grammar, $encoding);
         $tokenList = $tokenizer->tokenize();
 
         foreach ($tokenList as $lineNumber => $line) {
             echo "$lineNumber: $line\n";
         }
+
+        mb_regex_encoding();
     }
 }
\ No newline at end of file
diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php
index 2edd0c8..c80da82 100644
--- a/lib/Tokenizer.php
+++ b/lib/Tokenizer.php
@@ -14,13 +14,15 @@ use dW\Lit\Grammar\{
 
 class Tokenizer {
     protected \Generator $data;
+    protected string $encoding;
     protected Grammar $grammar;
     protected array $ruleStack;
     protected array $scopeStack;
-    
 
-    public function __construct(\Generator $data, Grammar $grammar) {
+
+    public function __construct(\Generator $data, Grammar $grammar, string $encoding) {
         $this->data = $data;
+        $this->encoding = $encoding;
         $this->grammar = $grammar;
         $this->ruleStack = [ $this->grammar ];
         $this->scopeStack = [ $this->grammar->scopeName ];
@@ -33,9 +35,9 @@ class Tokenizer {
 
     public function tokenize(): \Generator {
         $appendNewLine = true;
-
         foreach ($this->data as $lineNumber => $inputLine) {
-            $line = $inputLine;
+            yield $lineNumber => $this->_tokenize($inputLine);
+            /*$line = $inputLine;
             $lineWithNewLine = ($appendNewLine) ? "$line\n" : $line;
             $initialStackRuleLength = count($this->ruleStack);
             $position = 0;
@@ -47,20 +49,46 @@ class Tokenizer {
                 if ($position > mb_strlen($line)) {
                     break;
                 }
-            }
+            }*/
         }
     }
 
 
-    protected function getMatch(string $regex, string $line): ?array {
-        if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE) !== 1) {
+    protected function getMatch(string $regex, string $line, int $offset = 0): ?array {
+        // Using mbstring's regular expressions because it truly supports multibyte
+        // strings but also because the original implementation used Oniguruma.
+        mb_ereg_search_init($line, mb_convert_encoding($regex, 'UTF-32'));
+
+        if ($offset !== 0) {
+            // UTF-32 uses 4 bytes for every character; multiply by 4 to convert from
+            // character offset to byte offset.
+            mb_ereg_search_setpos($offset * 4);
+        }
+
+        $pos = mb_ereg_search_pos();
+        if ($pos === false) {
             return null;
         }
 
+        // UTF-32 uses 4 bytes for every character; divide by 4 to get character
+        // offsets.
+        $length = $pos[1] / 4;
+        $pos = [
+            'start' => $pos[0] / 4,
+        ];
+        $pos['end'] = $pos['start'] + $length;
+
+        $match = mb_ereg_search_getregs();
+        // Convert the matches back to the original encoding.
+        foreach ($match as &$m) {
+            $m = mb_convert_encoding($m, $this->encoding, 'UTF-32');
+        }
+
+        $match['offset'] = $pos;
         return $match;
     }
 
-    protected function tokenizeLine(string $inputLine): array {
+    protected function _tokenize(string $inputLine, int $offset = 0): array {
         $currentRules = end($this->ruleStack)->patterns->getIterator();
         $currentRulesCount = count($currentRules);
         $results = [];
@@ -70,31 +98,8 @@ class Tokenizer {
             while (true) {
                 $rule = $currentRules[$i];
                 if ($rule instanceof Pattern) {
-                    $matchMode = null;
-                    $regex = null;
-                    if ($rule->match !== null) {
-                        $regex = $rule->match;
-                        $matchMode = self::MATCH_MODE_SINGLE;
-                    } elseif ($rule->begin !== null) {
-                        $regex = $rule->begin;
-                        $matchMode = self::MATCH_MODE_BEGINEND;
-                    }
-
-                    if ($matchMode !== null && $match = $this->getMatch($regex, $line)) {
-                        $scopeStack = $this->scopeStack;
-                        if ($rule->name !== null) {
-                            $scopeStack[] = $rule->name;
-                        }
-                        if ($rule->contentName !== null) {
-                            $scopeStack[] = $rule->contentName;
-                        }
-
-                        die(var_export($rule));
-
-                        if ($matchMode === self::MATCH_MODE_BEGINEND) {
-                            $this->ruleStack[] = $rule;
-                            $this->scopeStack[] = $scopeStack;
-                        }
+                    if ($match = $this->getMatch($rule->match, $line, $offset)) {
+                        $offset = $match['offset']['end'];
                     }
                 } elseif ($rule instanceof Reference && $obj = $rule->get()) {
                     if ($obj instanceof PatternList) {
@@ -111,5 +116,7 @@ class Tokenizer {
                 break;
             }
         }
+
+        return $inputLine;
     }
 }
\ No newline at end of file