You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
121 lines
7.0 KiB
121 lines
7.0 KiB
<!DOCTYPE html>
|
|
<meta charset=gb18030>
|
|
<!-- Correct results are provided by Firefox -->
|
|
<pre style="font-family: 'Consolas', monospace;"></pre>
|
|
<script>
|
|
"use strict";
|
|
var data = [
|
|
{ encoding: 'gb18030', input: [], name: 'empty string' },
|
|
{ encoding: 'gb18030', input: [0x40], name: 'sanity check' },
|
|
{ encoding: 'gb18030', input: [0x80], name: 'special case for 0x80' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0x37], name: 'four-byte special case' },
|
|
{ encoding: 'gb18030', input: [0xA8, 0x4E], name: 'two-byte character' },
|
|
{ encoding: 'gb18030', input: [0x82, 0x31, 0xA2, 0x37], name: 'four-byte character' },
|
|
{ encoding: 'gb18030', input: [0x82], name: 'EOF after first byte' },
|
|
{ encoding: 'gb18030', input: [0x82, 0x30], name: 'EOF after second byte' },
|
|
{ encoding: 'gb18030', input: [0x82, 0x30, 0x81], name: 'EOF after third byte' },
|
|
{ encoding: 'gb18030', input: [0xFF, 0x35, 0xF4, 0x37], name: 'bad first byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0xFF, 0xF4, 0x37], name: 'bad second byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xFF, 0x37], name: 'bad third byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0xFF], name: 'bad fourth byte' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x35, 0xF4, 0x37], name: 'control first byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x00, 0xF4, 0x37], name: 'control second byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0x00, 0x37], name: 'control third byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0x00], name: 'control fourth byte' },
|
|
{ encoding: 'gb18030', input: [0xFF, 0x35, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'bad first byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0xFF, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'bad second byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xFF, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'bad third byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0xFF, 0x00, 0x00, 0x00, 0x00], name: 'bad fourth byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x35, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'control first byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x00, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'control second byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0x00, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'control third byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00], name: 'control fourth byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x84, 0x32, 0xA4, 0x39], name: 'void sequence' },
|
|
{ encoding: 'gb18030', input: [0xFE, 0x39, 0xFE, 0x39], name: 'void sequence 2' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x81, 0x81, 0x30], name: 'seek test 1' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x81, 0x80], name: 'seek test 2' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x81, 0x00], name: 'seek test 3' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x81, 0x81, 0x00], name: 'seek test 4' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x30, 0x30, 0x30], name: 'seek test 5' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x30, 0x81, 0x81], name: 'seek test 6' },
|
|
{ encoding: 'gb18030', input: [0x30, 0x30, 0x81, 0x81], name: 'seek test 7' },
|
|
{ encoding: 'gb18030', input: [0xF8, 0x83, 0xFE, 0x80], name: 'seek test 8' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0x81, 0x81, 0x81, 0x30, 0x00, 0x00, 0x00, 0x00], name: 'seek test 1 (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0x81, 0x81, 0x80, 0x00, 0x00, 0x00, 0x00], name: 'seek test 2 (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0x81, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00], name: 'seek test 3 (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0x81, 0x81, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00], name: 'seek test 4 (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0x81, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00], name: 'seek test 5 (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0x81, 0x30, 0x81, 0x81, 0x00, 0x00, 0x00, 0x00], name: 'seek test 6 (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0x30, 0x30, 0x81, 0x81, 0x00, 0x00, 0x00, 0x00], name: 'seek test 7 (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x00, 0x00, 0x00, 0xF8, 0x83, 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00], name: 'seek test 8 (padded)' },
|
|
];
|
|
data.forEach(function(data) {
|
|
var bytes = [];
|
|
data.input.forEach((p) => {
|
|
bytes.push(p.toString(16).padStart(2, "0").toUpperCase());
|
|
});
|
|
var codes = [];
|
|
var text = new TextDecoder(data.encoding).decode(new Uint8Array(data.input));
|
|
var b = 0;
|
|
for (let a = 0; a < text.length; a++) {
|
|
let point = text.codePointAt(a);
|
|
if (point >= 55296 && point <= 57343) {
|
|
// non-BMP characters have trailing low surrogates in JavaScript strings
|
|
continue;
|
|
}
|
|
codes[b++] = point;
|
|
}
|
|
bytes = bytes.join(" ");
|
|
codes = codes.join(", ");
|
|
var line = "'" + data.name + "' => [" + '"' + bytes + '", [' + codes + "]],\n";
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode(line));
|
|
})
|
|
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode("\n\n\n"));
|
|
|
|
/*
|
|
Char 0 U+007A (1 byte) Offset 0
|
|
Char 1 U+00A2 (2 bytes) Offset 1
|
|
Char 2 U+6C34 (3 bytes) Offset 3
|
|
Char 3 U+1D11E (4 bytes) Offset 6
|
|
Char 4 U+F8FF (3 bytes) Offset 10
|
|
Char 5 U+10FFFD (4 bytes) Offset 13
|
|
Char 6 U+FFFE (3 bytes) Offset 17
|
|
End of string at char 7, offset 20
|
|
*/
|
|
[0x7A, 0xA2, 0x6C34, 0x1D11E, 0xF8FF, 0x10FFFD, 0xFFFE].forEach(function(code) {
|
|
var l = document.createElement("a");
|
|
l.href = "http://example.com/?" + String.fromCodePoint(code);
|
|
var url = l.search.substr(1);
|
|
var bytes = [];
|
|
for (let a = 0; a < url.length; a++) {
|
|
if (url.charAt(a) == "%") {
|
|
bytes.push(url.charAt(a + 1) + url.charAt(a + 2));
|
|
a = a + 2;
|
|
} else {
|
|
bytes.push(url.charCodeAt(a).toString(16).padStart(2, "0"));
|
|
}
|
|
}
|
|
var line = bytes.join(" ").toUpperCase() + "\n";
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode(line));
|
|
})
|
|
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode("\n\n\n"));
|
|
|
|
[0x64, 0x20AC, 0x2164, 0x3A74, 0xE7C7, 0x1D11E].forEach(function(code) {
|
|
var l = document.createElement("a");
|
|
l.href = "http://example.com/?" + String.fromCodePoint(code);
|
|
var url = l.search.substr(1);
|
|
var bytes = [];
|
|
for (let a = 0; a < url.length; a++) {
|
|
if (url.charAt(a) == "%") {
|
|
bytes.push(url.charAt(a + 1) + url.charAt(a + 2));
|
|
a = a + 2;
|
|
} else {
|
|
bytes.push(url.charCodeAt(a).toString(16).padStart(2, "0"));
|
|
}
|
|
}
|
|
var line = "0x" + code.toString(16).toUpperCase() + ", " + bytes.join(" ").toUpperCase() + "\n";
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode(line));
|
|
})
|
|
</script>
|
|
|