J. King
6 years ago
1 changed files with 0 additions and 67 deletions
@ -1,67 +0,0 @@ |
|||
<!DOCTYPE html> |
|||
<pre></pre> |
|||
<script> |
|||
var data = [ |
|||
// basics |
|||
{ encoding: 'utf-8', input: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33], name: 'sanity check' }, |
|||
{ encoding: 'utf-8', input: [0xE5, 0x8F, 0xA4, 0xE6, 0xB1, 0xA0, 0xE3, 0x82, 0x84, 0xE8, 0x9B, 0x99, 0xE9, 0xA3, 0x9B, 0xE3, 0x81, 0xB3, 0xE8, 0xBE, 0xBC, 0xE3, 0x82, 0x80, 0xE6, 0xB0, 0xB4, 0xE3, 0x81, 0xAE, 0xE9, 0x9F, 0xB3], name: 'multibyte control' }, |
|||
// bad input |
|||
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' }, |
|||
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' }, |
|||
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' }, |
|||
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' }, |
|||
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' }, |
|||
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' }, |
|||
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' }, |
|||
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' }, |
|||
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' }, |
|||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10FFFF' }, |
|||
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' }, |
|||
// Overlong encodings |
|||
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 6 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF], name: 'overlong U+007F - 3 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 4 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 5 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 6 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF], name: 'overlong U+07FF - 3 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 4 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 5 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 6 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 4 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 5 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 6 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 5 bytes' }, |
|||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 6 bytes' }, |
|||
// UTF-16 surrogates encoded as code points in UTF-8 |
|||
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' }, |
|||
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' }, |
|||
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], name: 'surrogate pair' }, |
|||
// mixed input |
|||
{ encoding: 'utf-8', input: [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE], name: 'mixed sample' } |
|||
] |
|||
data.forEach(function(data) { |
|||
var bytes = "" |
|||
data.input.forEach((p) => { |
|||
bytes = bytes + "\\x" + p.toString(16).padStart(2, "0").toUpperCase() |
|||
}) |
|||
var codes = [] |
|||
var text = new TextDecoder("utf-8").decode(new Uint8Array(data.input)) |
|||
var b = 0 |
|||
for (let a = 0; a < text.length; a++) { |
|||
let point = text.codePointAt(a) |
|||
if (point >= 55296 && point <= 57343) { |
|||
// non-BMP characters have trailing low surrogates in JavaScript strings |
|||
continue |
|||
} |
|||
codes[b++] = point |
|||
} |
|||
codes = codes.join(", ") |
|||
var line = "'" + data.name + "' => [" + '"' + bytes + '", [' + codes + "]],\n" |
|||
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode(line)); |
|||
}) |
|||
</script> |
Loading…
Reference in new issue