Browse Source

More sanitization progress

Script to scrape attribute list needs more work to deal with globals
master
J. King 1 year ago
parent
commit
eee4d65faf
  1. 32
      RoboFile.php
  2. 71
      lib/AbstractSanitizer.php

32
RoboFile.php

@ -160,28 +160,54 @@ class RoboFile extends \Robo\Tasks {
public function constants(): Result {
$c = $this->collectionBuilder()->addCode(function() {
$elems = [];
$atrs = [];
// retrieve the single-page HTML specification (this is around 15MB in size)
$spec = file_get_contents("https://html.spec.whatwg.org/");
// parse it (this may take several seconds
if ($spec) {
$p = new DOMParser;
$document = $p->parseFromString($spec, "text/html;charset=utf-8");
// pick out element definitions from the specification
// pick out element and attribute definitions from the specification
foreach ($document->getElementsByTagName("dfn") as $el) {
if ($el->getAttribute("data-dfn-type") === "element") {
$type = $el->getAttribute("data-dfn-type");
if ($type === "element") {
$elems[] = trim($el->textContent);
} elseif ($type === "element-attr") {
$name = trim($el->textContent);
if (preg_match('/\s/', $name)) {
// skip the definition if the name is not valid
continue;
}
$attrs[$name] = $attrs[$name] ?? [];
$context = $el->getAttribute("data-dfn-for") ?? "";
if (strlen($context)) {
$context = explode(",", $context);
$attrs[$name] = array_merge($attrs[$name], $context);
}
}
}
}
// sort and filter the results for unqiueness
sort($elems);
ksort($attrs);
$elems = array_unique($elems);
// output the list of elements as a PHP array
$attrs = array_map(function($v) {
sort($v);
return $v;
}, $attrs);
// formt the lists as a PHP arrays
$elems = array_map(function($e) {
return "'$e' => true";
}, $elems);
$elems = implode(", ", $elems);
$attrList = [];
foreach ($attrs as $name => $context) {
$attrList[] = "'$name' => [\"".implode("\", \"", $context)."\"]";
}
$attrs = implode(", ", $attrList);
echo "protected const KNOWN_ELEMENTS_HTML = [".$elems."];\n";
echo "protected const KNOWN_ATTRIBUTES_HTML = [".$attrs."];\n";
});
return $c->run();
}

71
lib/AbstractSanitizer.php

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save