Browse Source

Better coverage of BOM-based detection

split-manual
J. King 4 years ago
parent
commit
1f3c33ad9e
  1. 4
      lib/Charset.php
  2. 18
      tests/cases/TestCharset.php

4
lib/Charset.php

@ -11,9 +11,9 @@ abstract class Charset {
if (substr($data, 0, 3) === "\u{FEFF}") {
return "UTF-8";
} elseif (substr($data, 0, 2) === "\xFE\xFF") {
return "UTF-6BE";
return "UTF-16BE";
} elseif (substr($data, 0, 2) === "\xFF\xFE") {
return "UTF-6LE";
return "UTF-16LE";
} else {
return null;
}

18
tests/cases/TestCharset.php

@ -41,8 +41,8 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
}
/** @dataProvider provideContentTypes */
public function testDetermineEncodingFromContentType(string $in, ?string $exp) {
$this->assertSame($exp, Charset::fromTransport($in));
public function testDetermineEncodingFromContentType(string $input, ?string $exp) {
$this->assertSame($exp, Charset::fromTransport($input));
}
public function provideContentTypes() {
@ -62,7 +62,21 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
["text/html; charsaaet=\"a \\\"fancy\\\" encoding\"", null],
];
}
/** @dataProvider provideBOMs */
public function testDetermineEncodingFromByteOrderMark(string $input, ?string $exp) {
$this->assertSame($exp, Charset::fromBOM($input));
}
public function provideBOMs() {
return [
'UTF-8' => ["\u{FEFF}Hello world!", "UTF-8"],
'UTF-16 (big-endian)' => ["\xFE\xFF\0H\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d\0!", "UTF-16BE"],
'UTF-16 (little-endian)' => ["\xFF\xFEH\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d\0!\0", "UTF-16LE"],
'No byte order mark' => ["Hello world!", null],
];
}
/** @dataProvider provideStandardEncodingTests */
public function testStandardEncoderTests(string $input, string $exp) {
$exp = strtolower($exp);

Loading…
Cancel
Save