2021-02-12 21:05:48 -05:00
< ? php
declare ( strict_types = 1 );
namespace dW\HTML5\TestCase ;
2021-02-12 22:46:10 -05:00
use dW\HTML5\Data ;
use dW\HTML5\Document ;
use dW\HTML5\EOFToken ;
2021-02-13 10:29:07 -05:00
use dW\HTML5\LoopException ;
2021-02-14 21:57:44 -05:00
use dW\HTML5\NotImplementedException ;
2021-02-12 22:46:10 -05:00
use dW\HTML5\OpenElementsStack ;
use dW\HTML5\ParseError ;
2021-02-12 21:05:48 -05:00
use dW\HTML5\Parser ;
2021-02-12 22:46:10 -05:00
use dW\HTML5\TemplateInsertionModesStack ;
use dW\HTML5\Tokenizer ;
use dW\HTML5\TreeBuilder ;
2021-02-12 21:05:48 -05:00
/**
* @ covers \dW\HTML5\TreeBuilder
*/
class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
protected $out ;
protected $depth ;
/** @dataProvider provideStandardTreeTests */
2021-02-15 08:44:06 -05:00
public function testStandardTreeTests ( string $data , array $exp , array $errors , $fragment ) : void {
// certain tests need to be patched to ignore unavoidable limitations of PHP's DOM
2021-02-17 14:39:24 -05:00
[ $exp , $patched , $skip ] = $this -> patchTest ( $data , $fragment , $exp );
if ( strlen ( $skip )) {
$this -> markTestSkipped ( $skip );
} elseif ( $patched ) {
2021-02-15 11:34:45 -05:00
$this -> markAsRisky ();
}
2021-02-12 22:46:10 -05:00
// convert parse error constants into standard symbols in specification
$errorMap = array_map ( function ( $str ) {
return strtolower ( str_replace ( " _ " , " - " , $str ));
}, array_flip ( array_filter (( new \ReflectionClass ( ParseError :: class )) -> getConstants (), function ( $v ) {
return is_int ( $v );
})));
// create a stub error handler which collects parse errors
$errors = [];
$errorHandler = $this -> createStub ( ParseError :: class );
$errorHandler -> method ( " emit " ) -> willReturnCallback ( function ( $file , $line , $col , $code ) use ( & $errors , $errorMap ) {
$errors [] = [ 'code' => $errorMap [ $code ], 'line' => $line , 'col' => $col ];
return true ;
});
2021-02-18 10:30:49 -05:00
// initialize the output document
2021-02-12 22:46:10 -05:00
$doc = new Document ;
2021-02-17 21:45:14 -05:00
// prepare the fragment context, if any
if ( $fragment ) {
$fragment = explode ( " " , $fragment );
assert ( sizeof ( $fragment ) < 3 );
if ( sizeof ( $fragment ) === 1 ) {
$fragmentContext = $doc -> createElement ( $fragment [ 0 ]);
} else {
$ns = array_flip ( Parser :: NAMESPACE_MAP )[ $fragment [ 0 ]] ? ? null ;
assert ( isset ( $ns ));
$fragmentContext = $doc -> createElementNS ( $ns , $fragment [ 1 ]);
}
} else {
$fragmentContext = null ;
2021-02-18 10:30:49 -05:00
} // initialize the other classes we need
$decoder = new Data ( $data , " STDIN " , $errorHandler , " UTF-8 " );
$stack = new OpenElementsStack ( $fragmentContext );
$tokenizer = new Tokenizer ( $decoder , $stack , $errorHandler );
2021-02-17 21:45:14 -05:00
$treeBuilder = new TreeBuilder ( $doc , $decoder , $tokenizer , $errorHandler , $stack , new TemplateInsertionModesStack , $fragmentContext );
2021-02-12 22:46:10 -05:00
// run the tree builder
2021-02-13 10:29:07 -05:00
try {
do {
$token = $tokenizer -> createToken ();
$treeBuilder -> emitToken ( $token );
} while ( ! $token instanceof EOFToken );
2021-02-17 14:39:24 -05:00
} catch ( \DOMException $e ) {
$this -> markTestSkipped ( 'Requires implementation of the "Coercing an HTML DOM into an infoset" specification section' );
return ;
2021-02-13 10:29:07 -05:00
} catch ( LoopException $e ) {
2021-02-22 22:58:38 -05:00
$act = $this -> balanceTree ( $this -> serializeTree ( $doc , ( bool ) $fragmentContext ), $exp );
2021-02-13 10:29:07 -05:00
$this -> assertEquals ( $exp , $act , $e -> getMessage () . " \n " . $treeBuilder -> debugLog );
2021-02-17 14:39:24 -05:00
throw $e ;
2021-02-14 21:57:44 -05:00
} catch ( NotImplementedException $e ) {
$this -> markTestSkipped ( $e -> getMessage ());
return ;
2021-02-13 10:29:07 -05:00
}
2021-02-22 22:58:38 -05:00
$act = $this -> balanceTree ( $this -> serializeTree ( $doc , ( bool ) $fragmentContext ), $exp );
2021-02-14 21:57:44 -05:00
$this -> assertEquals ( $exp , $act , $treeBuilder -> debugLog );
// TODO: evaluate errors
2021-02-12 21:05:48 -05:00
}
2021-02-15 08:44:06 -05:00
protected function patchTest ( string $data , $fragment , array $exp ) : array {
2021-02-15 11:34:45 -05:00
$patched = false ;
2021-02-17 14:39:24 -05:00
$skip = " " ;
2021-02-15 11:34:45 -05:00
// comments outside the root element are silently dropped by the PHP DOM
2021-02-22 10:38:19 -05:00
if ( ! $fragment ) {
for ( $a = 0 ; $a < sizeof ( $exp ); $a ++ ) {
if ( strpos ( $exp [ $a ], " | <!-- " ) === 0 ) {
array_splice ( $exp , $a -- , 1 );
$patched = true ;
}
2021-02-15 11:34:45 -05:00
}
2021-02-15 08:44:06 -05:00
}
2021-02-20 12:18:03 -05:00
if ( in_array ( $data , [
'<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>' ,
'<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>' ,
'<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>' ,
'<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>' ,
'<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>' ,
2021-02-23 21:09:52 -05:00
'<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>' ,
'<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>' ,
'<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>' ,
'<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>' ,
2021-02-20 12:18:03 -05:00
])) {
2021-02-17 14:39:24 -05:00
$skip = 'Requires implementation of the "Coercing an HTML DOM into an infoset" specification section' ;
}
return [ $exp , $patched , $skip ];
2021-02-15 08:44:06 -05:00
}
2021-02-22 22:58:38 -05:00
protected function balanceTree ( array $act , array $exp ) : array {
// makes sure that the actual tree contain the same number of lines as the expected tree
// lines are inserted where the two trees diverge, until the end of the actual tree is reached
// this usuallyresults in clean PHPUnit comparison failure output
for ( $a = 0 ; $a < sizeof ( $act ) && sizeof ( $act ) < sizeof ( $exp ); $a ++ ) {
if ( ! isset ( $act [ $a ]) || $exp [ $a ] !== $act [ $a ]) {
array_splice ( $act , $a , 0 , [ " " ]);
}
}
return $act ;
}
2021-02-12 21:05:48 -05:00
protected function push ( string $data ) : void {
$this -> out [] = " | " . str_repeat ( " " , $this -> depth ) . $data ;
}
2021-02-18 11:24:28 -05:00
protected function serializeTree ( Document $d , bool $fragment ) : array {
2021-02-12 21:05:48 -05:00
$this -> out = [];
$this -> depth = 0 ;
2021-02-17 21:45:14 -05:00
if ( $fragment ){
foreach ( $d -> documentElement -> childNodes as $n ) {
$this -> serializeNode ( $n );
}
} else {
if ( $d -> doctype ) {
$dt = " <!DOCTYPE " ;
$dt .= ( $d -> doctype -> name !== " " ) ? $d -> doctype -> name : " " ;
if ( strlen ( $d -> doctype -> publicId ) || strlen ( $d -> doctype -> systemId )) {
$dt .= ' "' . $d -> doctype -> publicId . '"' ;
$dt .= ' "' . $d -> doctype -> systemId . '"' ;
}
$dt .= " > " ;
$this -> push ( $dt );
}
if ( $d -> documentElement ) {
$this -> serializeElement ( $d -> documentElement );
2021-02-15 11:34:45 -05:00
}
2021-02-12 21:05:48 -05:00
}
return $this -> out ;
}
protected function serializeElement ( \DOMElement $e ) : void {
if ( $e -> namespaceURI ) {
2021-02-17 14:39:24 -05:00
$prefix = Parser :: NAMESPACE_MAP [ $e -> namespaceURI ];
2021-02-12 21:05:48 -05:00
assert (( bool ) $prefix , new \Exception ( " Prefix for namespace { $e -> namespaceURI } is not defined " ));
2021-02-17 14:39:24 -05:00
$prefix .= " " ;
2021-02-12 21:05:48 -05:00
} else {
$prefix = " " ;
}
$this -> push ( " < " . $prefix . $e -> localName . " > " );
$this -> depth ++ ;
$attr = [];
foreach ( $e -> attributes as $a ) {
$attr [ $a -> name ] = $a -> value ;
}
ksort ( $attr );
foreach ( $attr as $k => $v ) {
$this -> push ( $k . '="' . $v . '"' );
}
2021-02-23 17:30:45 -05:00
if ( $e -> localName === " template " && $e -> namespaceURI === null ) {
2021-02-12 21:05:48 -05:00
$this -> push ( " content " );
$this -> depth ++ ;
2021-02-23 17:30:45 -05:00
foreach ( $e -> content -> childNodes as $n ) {
$this -> serializeNode ( $n );
}
$this -> depth -- ;
2021-02-12 21:05:48 -05:00
}
foreach ( $e -> childNodes as $n ) {
$this -> serializeNode ( $n );
}
$this -> depth -- ;
}
2021-02-17 21:45:14 -05:00
protected function serializeNode ( \DOMNode $n ) : void {
2021-02-12 21:05:48 -05:00
if ( $n instanceof \DOMElement ) {
$this -> serializeElement ( $n );
} elseif ( $n instanceof \DOMProcessingInstruction ) {
$this -> push ( " <? " . $n -> target . " " . $n -> data . " > " );
2021-02-15 00:12:58 -05:00
} elseif ( $n instanceof \DOMComment ) {
$this -> push ( " <!-- " . $n -> data . " --> " );
} elseif ( $n instanceof \DOMCharacterData ) {
$this -> push ( '"' . $n -> data . '"' );
2021-02-12 21:05:48 -05:00
} else {
throw new \Exception ( " Node type " . get_class ( $n ) . " not handled " );
}
}
public function provideStandardTreeTests () : iterable {
$blacklist = [];
foreach ( new \GlobIterator ( \dW\HTML5\BASE . " tests/html5lib-tests/tree-construction/*.dat " , \FilesystemIterator :: SKIP_DOTS | \FilesystemIterator :: CURRENT_AS_PATHNAME ) as $file ) {
$index = 0 ;
$l = 0 ;
if ( ! in_array ( basename ( $file ), $blacklist )) {
2021-02-15 11:42:56 -05:00
$lines = array_map ( function ( $v ) {
return rtrim ( $v , " \n " );
}, file ( $file ));
2021-02-12 21:05:48 -05:00
while ( $l < sizeof ( $lines )) {
$pos = $l + 1 ;
assert ( $lines [ $l ] === " #data " , new \Exception ( " Test $file # $index does not start with #data tag at line " . ( $l + 1 )));
// collect the test input
$data = [];
for ( ++ $l ; $l < sizeof ( $lines ); $l ++ ) {
if ( $lines [ $l ] === " #errors " ) {
break ;
}
$data [] = $lines [ $l ];
}
$data = implode ( " \n " , $data );
// collect the test errors
$errors = [];
assert (( $lines [ $l ] ? ? " " ) === " #errors " , new \Exception ( " Test $file # $index does not list errors at line " . ( $l + 1 )));
for ( ++ $l ; $l < sizeof ( $lines ); $l ++ ) {
if ( $lines [ $l ] === " #new-errors " ) {
continue ;
} elseif ( preg_match ( '/^#(document(-fragment)?|script-(on|off)|)$/' , $lines [ $l ])) {
break ;
}
$errors [] = $lines [ $l ];
}
// set the script mode, if present
assert ( preg_match ( '/^#(script-(on|off)|document(-fragment)?)$/' , $lines [ $l ]) === 1 , new \Exception ( " Test $file # $index follows errors with something other than script flag, document fragment, or document at line " . ( $l + 1 )));
$script = null ;
if ( $lines [ $l ] === " #script-off " ) {
$script = false ;
$l ++ ;
} elseif ( $lines [ $l ] === " #script-on " ) {
$script = true ;
$l ++ ;
}
// collect the document fragment, if present
assert ( preg_match ( '/^#document(-fragment)?$/' , $lines [ $l ]) === 1 , new \Exception ( " Test $file # $index follows script flag with something other than document fragment or document at line " . ( $l + 1 )));
$fragment = null ;
if ( $lines [ $l ] === " #document-fragment " ) {
$fragment = $lines [ ++ $l ];
$l ++ ;
}
// collect the output tree
$exp = [];
assert ( $lines [ $l ] === " #document " , new \Exception ( " Test $file # $index follows dociument fragment with something other than document at line " . ( $l + 1 )));
for ( ++ $l ; $l < sizeof ( $lines ); $l ++ ) {
if ( $lines [ $l ] === " " && ( $lines [ $l + 1 ] ? ? " " ) === " #data " ) {
break ;
2021-02-14 21:57:44 -05:00
} elseif (( $lines [ $l ][ 0 ] ? ? " " ) !== " | " ) {
2021-02-12 21:05:48 -05:00
// apend the data to the previous token
$exp [ sizeof ( $exp ) - 1 ] .= " \n " . $lines [ $l ];
continue ;
}
assert ( preg_match ( '/^[^#]/' , $lines [ $l ]) === 1 , new \Exception ( " Test $file # $index contains unrecognized data after document at line " . ( $l + 1 )));
$exp [] = $lines [ $l ];
}
2021-02-15 08:44:06 -05:00
if ( ! $script ) {
// scripting-dependent tests are skipped entirely since we will not support scripting
yield " $file # $index (line $pos ) " => [ $data , $exp , $errors , $fragment ];
}
2021-02-12 21:05:48 -05:00
$l ++ ;
$index ++ ;
}
}
}
}
}