diff --git a/lib/Charset.php b/lib/Charset.php
index 7047ee6..09940b4 100644
--- a/lib/Charset.php
+++ b/lib/Charset.php
@@ -5,7 +5,19 @@ namespace dW\HTML5;
 use MensBeam\Intl\Encoding;
 
 abstract class Charset {
-    /** Matches an encoding label (e.g. "utf-8") to a compatible decoder class.
+    public static function fromBOM(string $data): ?string {
+        if (substr($data, 0, 3 ) === "\u{FEFF}") {
+            return "UTF-8";
+        } elseif ($data[0] === "\xFE" && $data[1] === "\xFF") {
+            return "UTF-6BE";
+        } elseif ($data[0] === "\xFF" && $data[1] === "\xFE") {
+            return "UTF-6LE";
+        } else {
+            return null;
+        }
+    }
+
+    /** Matches an encoding label (e.g. "utf-8") to its canonical name.
      * 
      * @param string $value The encoding label to match
      */
@@ -18,7 +30,7 @@ abstract class Charset {
     }
 
     /** Extracts an encoding from an HTTP Content-Type header-field
-     * and returns the class name of a compatible decoder.
+     * and returns the associated canonical encoding name.
      * 
      * @param string $contentType The value of a Content-Type header-field
      */
@@ -80,6 +92,11 @@ abstract class Charset {
         return null;
     }
 
+    /** Inspects the head of an HTML string to guess its encoding
+     * 
+     * @param string $data The HTML string to scan
+     * @param int $endAfter The number of bytes of the string to stop after 
+     */
     public static function fromPrescan(string $data, int $endAfter = 1024): ?string {
         # When an algorithm requires a user agent to prescan a byte stream to 
         #   determine its encoding, given some defined end condition, then it 
@@ -93,6 +110,7 @@ abstract class Charset {
         #   abort the prescan a byte stream to determine its encoding 
         #   algorithm unsuccessfully.
         $s = substr($data, 0, $endAfter);
+        $endAfter = strlen($s);
 
         # Let position be a pointer to a byte in the input byte stream, 
         #   initially pointing at the first byte.
@@ -187,7 +205,7 @@ abstract class Charset {
                         continue;
                     }
                     # If charset is a UTF-16 encoding, then set charset to UTF-8.
-                    elseif ($charset === "UTF-16") {
+                    elseif ($charset === "UTF-16" || $charset === "UTF-16LE" || $charset === "UTF-16BE") {
                         $charset = "UTF-8";
                     }
                     # If charset is x-user-defined, then set charset to windows-1252.
@@ -203,7 +221,7 @@ abstract class Charset {
                 elseif (($s[$pos] === "/" && ctype_alpha($s[$pos + 1])) || (ctype_alpha($s[$pos]))) {
                     # Advance the position pointer so that it points at the next 
                     #   0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), or 0x3E (>) byte.
-                    while (!in_array(@$s[$pos++], ["\x09", "\x0A", "\x0C", "\x0D", " ", ">", ""]));
+                    while (!in_array(@$s[++$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", ">", ""]));
                     # Repeatedly get an attribute until no further attributes can be found, 
                     #   then jump to the step below labeled next byte.
                     while(self::getAttribute($s, $pos));
@@ -223,8 +241,10 @@ abstract class Charset {
                 $pos++;
             }
         }
+        return null;
     }
 
+    /** Scans an attribute during the encoding detection pre-scan */
     protected static function getAttribute(string $s, &$pos): array {
         # When the prescan a byte stream to determine its encoding 
         #   algorithm says to get an attribute, it means doing this:
@@ -378,6 +398,7 @@ abstract class Charset {
         }
     }
 
+    /** Interprets a quasi-Content-Type value during the encoding detection pre-scan */
     protected static function fromMeta(string $s): ?string {
         # The algorithm for extracting a character encoding from a meta element, 
         #   given a string s, is as follows.
diff --git a/lib/Data.php b/lib/Data.php
index 8c98b76..5d43dde 100644
--- a/lib/Data.php
+++ b/lib/Data.php
@@ -44,7 +44,10 @@ class Data {
             $this->filePath = $filePath;
         }
 
-        if ($encoding = Charset::fromCharset($encodingOrContentType)) {
+        if ($encoding = Charset::fromBOM($data)) {
+            // encoding determined from Unicode byte order mark
+            $this->encodingCertain = true;
+        } elseif ($encoding = Charset::fromCharset($encodingOrContentType)) {
             $this->encodingCertain = true;
         } elseif ($encoding = Charset::fromTransport($encodingOrContentType)) {
             $this->encodingCertain = true;
diff --git a/tests/cases/TestCharset.php b/tests/cases/TestCharset.php
index b7c2eb2..331f74b 100644
--- a/tests/cases/TestCharset.php
+++ b/tests/cases/TestCharset.php
@@ -2,6 +2,24 @@
 declare(strict_types=1);
 namespace dW\HTML5\TestCase;
 
+/* Missing tests:
+
+Pre-scan:
+
+- UTF-16LE and UTF-16BE BOM tests
+- Duplicate attributes
+- x-user-defined substitution
+- EOF after attribute name
+- Greater-than sign after equals sign
+- EOF after equals sign
+
+Meta parsing:
+
+- No equals sign after charset
+- EOF after equals sign
+
+*/
+
 use dW\HTML5\Charset;
 
 /** 
@@ -44,4 +62,45 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
             ["text/html; charsaaet=\"a \\\"fancy\\\" encoding\"", null],
         ];
     }
+    
+    /** @dataProvider provideStandardEncodingTests */
+    public function testStandardEncoderTests(string $input, string $exp) {
+        $exp = strtolower($exp);
+        if (in_array($exp, ["euc-jp", "iso-2022-jp", "shift-jis"])) {
+            $this->markTestIncomplete("Japanese encodings are not yet implemented");
+        }
+        $this->assertSame(strtolower($exp), strtolower(Charset::fromBOM($input)?? Charset::fromPrescan($input, \PHP_INT_MAX) ?? "Windows-1252"));
+    }
+
+    public function provideStandardEncodingTests() {
+        $tests = [];
+        $blacklist = [];
+        foreach (new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) {
+            if (!in_array(basename($file), $blacklist)) {
+                $tests[] = $file;
+            }
+        }
+        return $this->makeEncodingTests(...$tests);
+    }
+
+    protected function makeEncodingTests(string ...$file): iterable {
+        foreach ($file as $path) {
+            $f = basename($path);
+            $test = file($path);
+            $l = 0;
+            $index = 0;
+            while ($l < sizeof($test)) {
+                $testId = "$f #".$index++;
+                $data = "";
+                while (!preg_match("/^#data\s+$/", $test[$l++]));
+                while (!preg_match("/^#encoding\s+$/", ($line = $test[$l++]))) {
+                    $data .= $line;
+                }
+                if (in_array($testId,["tests1.dat #54", "tests1.dat #55"])) {
+                    continue;
+                }
+                yield $testId => [$data, trim($test[$l++])];
+            }
+        }
+    }
 }
diff --git a/tests/cases/TestTokenizer.php b/tests/cases/TestTokenizer.php
index 1158a40..9e8cfff 100644
--- a/tests/cases/TestTokenizer.php
+++ b/tests/cases/TestTokenizer.php
@@ -66,8 +66,6 @@ class TestTokenizer extends \PHPUnit\Framework\TestCase {
                 }
             } while (!($t instanceof EOFToken));
         } finally {
-            //$expErrors = $expErrors ? array_column($expErrors, "code") : [];
-            //$errors = $errors ? array_column($errors, "code") : [];
             $actual = $this->combineCharacterTokens($actual);
             $this->assertEquals($expected, $actual, $tokenizer->debugLog);
             $this->assertEquals($expErrors, $errors, $tokenizer->debugLog);