You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
74 lines
4.1 KiB
74 lines
4.1 KiB
<!DOCTYPE html>
|
|
<meta charset=gb18030>
|
|
<!-- Correct results are provided by Firefox -->
|
|
<pre></pre>
|
|
<script>
|
|
var data = [
|
|
// basics
|
|
{ encoding: 'gb18030', input: [0x40], name: 'sanity check' },
|
|
{ encoding: 'gb18030', input: [0x80], name: 'special case for 0x80' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0x37], name: 'four-byte special case' },
|
|
{ encoding: 'gb18030', input: [0xA8, 0x4E], name: 'two-byte character' },
|
|
{ encoding: 'gb18030', input: [0x82, 0x31, 0xA2, 0x37], name: 'four-byte character' },
|
|
{ encoding: 'gb18030', input: [0x82], name: 'EOF after first byte' },
|
|
{ encoding: 'gb18030', input: [0x82, 0x30], name: 'EOF after second byte' },
|
|
{ encoding: 'gb18030', input: [0x82, 0x30, 0x81], name: 'EOF after third byte' },
|
|
{ encoding: 'gb18030', input: [0xFF, 0x35, 0xF4, 0x37], name: 'bad first byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0xFF, 0xF4, 0x37], name: 'bad second byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xFF, 0x37], name: 'bad third byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0xFF], name: 'bad fourth byte' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x35, 0xF4, 0x37], name: 'control first byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x00, 0xF4, 0x37], name: 'control second byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0x00, 0x37], name: 'control third byte' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0x00], name: 'control fourth byte' },
|
|
{ encoding: 'gb18030', input: [0xFF, 0x35, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'bad first byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0xFF, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'bad second byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xFF, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'bad third byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0xFF, 0x00, 0x00, 0x00, 0x00], name: 'bad fourth byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x00, 0x35, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'control first byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x00, 0xF4, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'control second byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0x00, 0x37, 0x00, 0x00, 0x00, 0x00], name: 'control third byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x81, 0x35, 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00], name: 'control fourth byte (padded)' },
|
|
{ encoding: 'gb18030', input: [0x84, 0x32, 0xA4, 0x39], name: 'void sequence' },
|
|
{ encoding: 'gb18030', input: [0xFE, 0x39, 0xFE, 0x39], name: 'void sequence 2' },
|
|
];
|
|
data.forEach(function(data) {
|
|
var bytes = "";
|
|
data.input.forEach((p) => {
|
|
bytes = bytes + p.toString(16).padStart(2, "0").toUpperCase()
|
|
});
|
|
var codes = [];
|
|
var text = new TextDecoder(data.encoding).decode(new Uint8Array(data.input));
|
|
var b = 0;
|
|
for (let a = 0; a < text.length; a++) {
|
|
let point = text.codePointAt(a);
|
|
if (point >= 55296 && point <= 57343) {
|
|
// non-BMP characters have trailing low surrogates in JavaScript strings
|
|
continue;
|
|
}
|
|
codes[b++] = point;
|
|
}
|
|
codes = codes.join(", ");
|
|
var line = "'" + data.name + "' => [" + '"' + bytes + '", [' + codes + "]],\n";
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode(line));
|
|
})
|
|
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode("\n\n\n"));
|
|
|
|
[0x64, 0x20AC, 0x2164, 0x3A74, 0xE7C7, 0x1D11E].forEach(function(code) {
|
|
var l = document.createElement("a");
|
|
l.href = "http://example.com/?" + String.fromCodePoint(code);
|
|
var url = l.search.substr(1);
|
|
var bytes = "";
|
|
for (let a = 0; a < url.length; a++) {
|
|
if (url.charAt(a) == "%") {
|
|
bytes = bytes.concat(url.charAt(a + 1), url.charAt(a + (padded)));
|
|
a = a + (padded);
|
|
} else {
|
|
bytes = bytes.concat(url.charCodeAt(a).toString(16).padStart(2, "0"));
|
|
}
|
|
}
|
|
var line = "0x" + code.toString(16).toUpperCase() + ", " + bytes.toUpperCase() + "\n";
|
|
document.getElementsByTagName("pre")[0].appendChild(document.createTextNode(line));
|
|
})
|
|
</script>
|
|
|