Add missing tests for charset pre-scan

This commit is contained in:
J. King 2021-03-17 15:35:44 -04:00
parent 93f0e3cf73
commit b4d2f21199
5 changed files with 48 additions and 6 deletions

2
.gitattributes vendored Normal file
View file

@ -0,0 +1,2 @@
*.dat -text diff
*.test -text diff

View file

@ -265,7 +265,7 @@ abstract class Charset {
spaces:
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) {
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
$pos++;
}
$char = @$s[$pos];
@ -284,7 +284,7 @@ abstract class Charset {
value:
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) {
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
$pos++;
}
$char = @$s[$pos];
@ -424,5 +424,5 @@ abstract class Charset {
return self::fromCharset(substr($s, $pos, $size));
}
}
}
} // @codeCoverageIgnore
}

View file

@ -86,7 +86,10 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
public function provideStandardEncodingTests() {
$tests = [];
$blacklist = [];
foreach (new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) {
$files = new \AppendIterator();
$files->append(new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\dW\HTML5\BASE."tests/cases/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
$tests[] = $file;
}
@ -111,7 +114,7 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
if ($l >= $end) {
return;
}
yield $testId => [trim($data), trim($test[$l++])];
yield $testId => [trim($data, "\r\n"), trim($test[$l++])];
}
}
}

View file

@ -0,0 +1,35 @@
#data
<!DOCTYPE HTML>
<meta charset="x-user-defined">
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta charset="utf-8" charset="windows-1252">
#encoding
UTF-8
#data
<!DOCTYPE HTML>
<meta charset
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta charset=>
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta http-equiv="Content-Type" content="text/html; charset charset=">
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta http-equiv="Content-Type" content="text/html; charset charset=utf-8">
#encoding
UTF-8

View file

@ -16,8 +16,10 @@
</filter>
<testsuites>
<testsuite name="Tokenizer">
<testsuite name="Charset">
<file>cases/TestCharset.php</file>
</testsuite>
<testsuite name="Tokenizer">
<file>cases/TestTokenizer.php</file>
</testsuite>
<testsuite name="Tree">