Add missing tests for charset pre-scan
This commit is contained in:
parent
93f0e3cf73
commit
b4d2f21199
5 changed files with 48 additions and 6 deletions
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*.dat -text diff
|
||||
*.test -text diff
|
|
@ -265,7 +265,7 @@ abstract class Charset {
|
|||
spaces:
|
||||
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
|
||||
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
|
||||
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) {
|
||||
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
|
||||
$pos++;
|
||||
}
|
||||
$char = @$s[$pos];
|
||||
|
@ -284,7 +284,7 @@ abstract class Charset {
|
|||
value:
|
||||
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
|
||||
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
|
||||
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) {
|
||||
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
|
||||
$pos++;
|
||||
}
|
||||
$char = @$s[$pos];
|
||||
|
@ -424,5 +424,5 @@ abstract class Charset {
|
|||
return self::fromCharset(substr($s, $pos, $size));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // @codeCoverageIgnore
|
||||
}
|
||||
|
|
|
@ -86,7 +86,10 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
|
|||
public function provideStandardEncodingTests() {
|
||||
$tests = [];
|
||||
$blacklist = [];
|
||||
foreach (new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) {
|
||||
$files = new \AppendIterator();
|
||||
$files->append(new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
|
||||
$files->append(new \GlobIterator(\dW\HTML5\BASE."tests/cases/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
|
||||
foreach ($files as $file) {
|
||||
if (!in_array(basename($file), $blacklist)) {
|
||||
$tests[] = $file;
|
||||
}
|
||||
|
@ -111,7 +114,7 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
|
|||
if ($l >= $end) {
|
||||
return;
|
||||
}
|
||||
yield $testId => [trim($data), trim($test[$l++])];
|
||||
yield $testId => [trim($data, "\r\n"), trim($test[$l++])];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
35
tests/cases/encoding/mensbeam01.dat
Normal file
35
tests/cases/encoding/mensbeam01.dat
Normal file
|
@ -0,0 +1,35 @@
|
|||
#data
|
||||
<!DOCTYPE HTML>
|
||||
<meta charset="x-user-defined">
|
||||
#encoding
|
||||
Windows-1252
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>
|
||||
<meta charset="utf-8" charset="windows-1252">
|
||||
#encoding
|
||||
UTF-8
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>
|
||||
<meta charset
|
||||
#encoding
|
||||
Windows-1252
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>
|
||||
<meta charset=>
|
||||
#encoding
|
||||
Windows-1252
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset charset=">
|
||||
#encoding
|
||||
Windows-1252
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset charset=utf-8">
|
||||
#encoding
|
||||
UTF-8
|
|
@ -16,8 +16,10 @@
|
|||
</filter>
|
||||
|
||||
<testsuites>
|
||||
<testsuite name="Tokenizer">
|
||||
<testsuite name="Charset">
|
||||
<file>cases/TestCharset.php</file>
|
||||
</testsuite>
|
||||
<testsuite name="Tokenizer">
|
||||
<file>cases/TestTokenizer.php</file>
|
||||
</testsuite>
|
||||
<testsuite name="Tree">
|
||||
|
|
Loading…
Reference in a new issue