HTML-Sanitizer/RoboFile.php
2023-04-22 19:18:15 -04:00

219 lines
9.4 KiB
PHP

<?php
use Robo\Result;
use MensBeam\HTML\DOMParser;
const BASE = __DIR__.\DIRECTORY_SEPARATOR;
const BASE_TEST = BASE."tests".\DIRECTORY_SEPARATOR;
define("IS_WIN", defined("PHP_WINDOWS_VERSION_MAJOR"));
define("IS_MAC", php_uname("s") === "Darwin");
define("IS_LINUX", !IS_WIN && !IS_MAC);
error_reporting(0);
require_once BASE."vendor".DIRECTORY_SEPARATOR."autoload.php";
function norm(string $path): string {
$out = realpath($path);
if (!$out) {
$out = str_replace(["/", "\\"], \DIRECTORY_SEPARATOR, $path);
}
return $out;
}
class RoboFile extends \Robo\Tasks {
/** Runs the typical test suite
*
* Arguments passed to the task are passed on to PHPUnit. Thus one may, for
* example, run the following command and get the expected results:
*
* ./robo test --testsuite TTRSS --exclude-group slow --testdox
*
* Please see the PHPUnit documentation for available options.
*/
public function test(array $args): Result {
return $this->runTests(escapeshellarg(\PHP_BINARY), "typical", $args);
}
/** Runs the full test suite
*
* This includes pedantic tests which may help to identify problems.
* See help for the "test" task for more details.
*/
public function testFull(array $args): Result {
return $this->runTests(escapeshellarg(\PHP_BINARY), "full", $args);
}
/**
* Runs a quick subset of the test suite
*
* See help for the "test" task for more details.
*/
public function testQuick(array $args): Result {
return $this->runTests(escapeshellarg(\PHP_BINARY), "quick", $args);
}
/** Produces a code coverage report
*
* By default this task produces an HTML-format coverage report in
* tests/coverage/. Additional reports may be produced by passing
* arguments to this task as one would to PHPUnit.
*
* Robo first tries to use pcov and will fall back to xdebug.
* Neither pcov nor xdebug need to be enabled to be used; they
* only need to be present in the extension load path to be used.
*/
public function coverage(array $args): Result {
// run tests with code coverage reporting enabled
$exec = $this->findCoverageEngine();
return $this->runTests($exec, "coverage", array_merge(["--coverage-html", BASE_TEST."coverage"], $args));
}
/** Produces a code coverage report, with redundant tests
*
* Depending on the environment, some tests that normally provide
* coverage may be skipped, while working alternatives are normally
* suppressed for reasons of time. This coverage report will try to
* run all tests which may cover code.
*
* See also help for the "coverage" task for more details.
*/
public function coverageFull(array $args): Result {
// run tests with code coverage reporting enabled
$exec = $this->findCoverageEngine();
return $this->runTests($exec, "typical", array_merge(["--coverage-html", BASE_TEST."coverage"], $args));
}
/** Finds the first suitable means of computing code coverage, either pcov or xdebug. */
protected function findCoverageEngine(): string {
$dir = rtrim(ini_get("extension_dir"), "/").\DIRECTORY_SEPARATOR;
$ext = IS_WIN ? "dll" : "so";
$php = escapeshellarg(\PHP_BINARY);
$code = escapeshellarg(BASE."lib");
if (extension_loaded("pcov")) {
return "$php -d pcov.enabled=1 -d pcov.directory=$code";
} elseif (extension_loaded("xdebug")) {
return "$php -d xdebug.mode=coverage";
} elseif (file_exists($dir."pcov.$ext")) {
return "$php -d extension=pcov.$ext -d pcov.enabled=1 -d pcov.directory=$code";
} elseif (file_exists($dir."xdebug.$ext")) {
return "$php -d zend_extension=xdebug.$ext -d xdebug.mode=coverage";
} else {
return $php;
}
}
/** Returns the necessary shell arguments to print error output or all output to the bitbucket
*
* @param bool $all Whether all output (true) or only error output (false) should be suppressed
*/
protected function blackhole(bool $all = false): string {
$hole = IS_WIN ? "nul" : "/dev/null";
return $all ? ">$hole 2>&1" : "2>$hole";
}
/** Executes PHPUnit, used by the test and coverage tasks.
*
* @param string $executor The path to the PHP binary to execute with any required extra arguments. Normally this is either "php" or the result of findCoverageEngine()
* @param string $set The set of tests to run, either "typical" (excludes redundant tests), "quick" (excludes redundant and slow tests), "coverage" (excludes tests not needed for coverage), or "full" (all tests)
* @param array $args Extra arguments passed by Robo from the command line
*/
protected function runTests(string $executor, string $set, array $args): Result {
switch ($set) {
case "typical":
$set = ["--exclude-group", "optional"];
break;
case "quick":
$set = ["--exclude-group", "optional,slow"];
break;
case "coverage":
$set = ["--exclude-group", "optional,coverageOptional"];
break;
case "full":
$set = [];
break;
default:
throw new \Exception;
}
$execpath = norm(BASE."vendor-bin/phpunit/vendor/phpunit/phpunit/phpunit");
$confpath = realpath(BASE_TEST."phpunit.dist.xml") ?: norm(BASE_TEST."phpunit.xml");
return $this->taskExec($executor)->option("-d", "zend.assertions=1")->arg($execpath)->option("-c", $confpath)->args(array_merge($set, $args))->run();
}
/** Manually updates the imported web-platform-tests test suite */
public function testUpdate(): Result {
$repos = [
'platform-tests' => "https://github.com/web-platform-tests/wpt",
];
$c = $this->collectionBuilder();
foreach ($repos as $dir => $url) {
$dir = BASE_TEST.$dir;
if (is_dir($dir)) {
$c->addTask($this->taskGitStack()->dir($dir)->pull());
} else {
$c->addTask($this->taskGitStack()->cloneRepo($url, $dir));
}
}
$c->addTask($this->taskExec(BASE_TEST."parsetests")->arg(BASE_TEST."platform-tests")->rawArg(">")->arg(BASE_TEST."cases".DIRECTORY_SEPARATOR."std-sanitize.json"));
return $c->run();
}
public function constants(): Result {
$c = $this->collectionBuilder()->addCode(function() {
$out = ['elem' => [], 'attr' => []];
$elems = [];
$attrs = [];
// retrieve the single-page HTML specification (this is around 12MB in size)
$spec = file_get_contents("html.html"); //$spec = file_get_contents("https://html.spec.whatwg.org/");
// parse it (this may take several seconds
if ($spec) {
$p = new DOMParser;
$document = $p->parseFromString($spec, "text/html;charset=utf-8");
// pick out element and attribute definitions from the specification
foreach ($document->getElementsByTagName("dfn") as $el) {
$type = $el->getAttribute("data-dfn-type");
if ($type === "element") {
$elems[] = trim($el->textContent);
} elseif ($type === "element-attr") {
$name = trim($el->textContent);
if (preg_match('/\s/', $name)) {
// skip the definition if the name is not valid
continue;
}
$attrs[$name] = $attrs[$name] ?? [];
$context = $el->getAttribute("data-dfn-for") ?? "";
if (strlen($context)) {
$context = explode(",", $context);
$attrs[$name] = array_merge($attrs[$name], $context);
}
}
}
// do a second pass over what's been collected to find global attributes and normalize them to a single "*" entry
}
//$spec = file_get_contents("aria.html"); //$spec = file_get_contents("https://www.w3.org/TR/wai-aria/");
//$spec = file_get_contents("svg-elem.html"); //$spec = file_get_contents("https://www.w3.org/TR/SVG2/eltindex.html");
//$spec = file_get_contents("svg-attr.html"); //$spec = file_get_contents("https://www.w3.org/TR/SVG2/attindex.html");
// sort and filter the results for unqiueness
sort($elems);
ksort($attrs);
$elems = array_unique($elems);
$attrs = array_map(function($v) {
sort($v);
return $v;
}, $attrs);
// formt the lists as a PHP arrays
$elems = array_map(function($e) {
return "'$e' => true";
}, $elems);
$elems = implode(", ", $elems);
$attrList = [];
foreach ($attrs as $name => $context) {
$attrList[] = "'$name' => [\"".implode("\", \"", $context)."\"]";
}
$attrs = implode(", ", $attrList);
echo "protected const KNOWN_ELEMENTS_HTML = [".$elems."];\n";
echo "protected const KNOWN_ATTRIBUTES_HTML = [".$attrs."];\n";
});
return $c->run();
}
}