Browse Source

Add missing tests for charset pre-scan

split-manual
J. King 3 years ago
parent
commit
b4d2f21199
  1. 2
      .gitattributes
  2. 6
      lib/Charset.php
  3. 7
      tests/cases/TestCharset.php
  4. 35
      tests/cases/encoding/mensbeam01.dat
  5. 4
      tests/phpunit.dist.xml

2
.gitattributes

@ -0,0 +1,2 @@
*.dat -text diff
*.test -text diff

6
lib/Charset.php

@ -265,7 +265,7 @@ abstract class Charset {
spaces:
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) {
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
$pos++;
}
$char = @$s[$pos];
@ -284,7 +284,7 @@ abstract class Charset {
value:
# If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR),
# or 0x20 (SP) then advance position to the next byte, then, repeat this step.
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " ", "/"])) {
while (in_array(@$s[$pos], ["\x09", "\x0A", "\x0C", "\x0D", " "])) {
$pos++;
}
$char = @$s[$pos];
@ -424,5 +424,5 @@ abstract class Charset {
return self::fromCharset(substr($s, $pos, $size));
}
}
}
} // @codeCoverageIgnore
}

7
tests/cases/TestCharset.php

@ -86,7 +86,10 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
public function provideStandardEncodingTests() {
$tests = [];
$blacklist = [];
foreach (new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) {
$files = new \AppendIterator();
$files->append(new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\dW\HTML5\BASE."tests/cases/encoding/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
$tests[] = $file;
}
@ -111,7 +114,7 @@ class TestCharset extends \PHPUnit\Framework\TestCase {
if ($l >= $end) {
return;
}
yield $testId => [trim($data), trim($test[$l++])];
yield $testId => [trim($data, "\r\n"), trim($test[$l++])];
}
}
}

35
tests/cases/encoding/mensbeam01.dat

@ -0,0 +1,35 @@
#data
<!DOCTYPE HTML>
<meta charset="x-user-defined">
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta charset="utf-8" charset="windows-1252">
#encoding
UTF-8
#data
<!DOCTYPE HTML>
<meta charset
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta charset=>
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta http-equiv="Content-Type" content="text/html; charset charset=">
#encoding
Windows-1252
#data
<!DOCTYPE HTML>
<meta http-equiv="Content-Type" content="text/html; charset charset=utf-8">
#encoding
UTF-8

4
tests/phpunit.dist.xml

@ -16,8 +16,10 @@
</filter>
<testsuites>
<testsuite name="Tokenizer">
<testsuite name="Charset">
<file>cases/TestCharset.php</file>
</testsuite>
<testsuite name="Tokenizer">
<file>cases/TestTokenizer.php</file>
</testsuite>
<testsuite name="Tree">

Loading…
Cancel
Save