J. King
3 years ago
6 changed files with 78 additions and 17 deletions
@ -0,0 +1,46 @@ |
|||||
|
<?php |
||||
|
/** @license MIT |
||||
|
* Copyright 2017 , Dustin Wilson, J. King et al. |
||||
|
* See LICENSE and AUTHORS files for details */ |
||||
|
|
||||
|
declare(strict_types=1); |
||||
|
namespace MensBeam\HTML\TestCase; |
||||
|
|
||||
|
use MensBeam\HTML\Parser; |
||||
|
use MensBeam\HTML\Parser\Output; |
||||
|
use MensBeam\HTML\Parser\Config; |
||||
|
|
||||
|
/** |
||||
|
* @covers \MensBeam\HTML\Parser |
||||
|
* @covers \MensBeam\HTML\Parser\TreeBuilder |
||||
|
* @covers \MensBeam\HTML\Parser\Data::changeEncoding |
||||
|
*/ |
||||
|
class TestEncodingChange extends \PHPUnit\Framework\TestCase { |
||||
|
/** @dataProvider provideEncodingChanges */ |
||||
|
public function testChangeEncodingWithCharset(string $assumedEncoding, string $statedEncoding, string $actualEncoding, string $titleBytes, string $titleUTF8): void { |
||||
|
$in = "<!DOCTYPE html><html><head>".str_repeat(" ", 1024)."<title>$titleBytes</title><meta charset=$statedEncoding></head><body></body></html>"; |
||||
|
// if the input is some form of UTF-16, add the null bytes in the correct places |
||||
|
if ($assumedEncoding === "UTF-16BE") { |
||||
|
$in = preg_replace("/(.)/s", "\0$1", $in); |
||||
|
} else if ($assumedEncoding === "UTF16-LE") { |
||||
|
$in = preg_replace("/(.)/s", "$1\0", $in); |
||||
|
} |
||||
|
// set up the test |
||||
|
$conf = new Config; |
||||
|
$conf->encodingFallback = $assumedEncoding; |
||||
|
$out = Parser::parse($in, "", null, null, null, $conf); |
||||
|
$this->assertInstanceOf(Output::class, $out); |
||||
|
// check the output |
||||
|
$this->assertSame($actualEncoding, $out->encoding); |
||||
|
$this->assertSame($titleUTF8, $out->document->getElementsByTagName("title")[0]->textContent); |
||||
|
} |
||||
|
|
||||
|
public function provideEncodingChanges(): iterable { |
||||
|
return [ |
||||
|
["windows-1252", "UTF-8", "UTF-8", "ASCII title", "ASCII title"], |
||||
|
["windows-1252", "UTF-16BE", "UTF-8", "ASCII title", "ASCII title"], |
||||
|
["windows-1252", "UTF-16LE", "UTF-8", "ASCII title", "ASCII title"], |
||||
|
["windows-1252", "UTF-8", "UTF-8", "H\xC3\xA9", "H\u{E9}"], |
||||
|
]; |
||||
|
} |
||||
|
} |
Loading…
Reference in new issue