diff --git a/RoboFile.php b/RoboFile.php index 29c6c1f..6a093a8 100644 --- a/RoboFile.php +++ b/RoboFile.php @@ -159,10 +159,11 @@ class RoboFile extends \Robo\Tasks { public function constants(): Result { $c = $this->collectionBuilder()->addCode(function() { + $out = ['elem' => [], 'attr' => []]; $elems = []; - $atrs = []; - // retrieve the single-page HTML specification (this is around 15MB in size) - $spec = file_get_contents("https://html.spec.whatwg.org/"); + $attrs = []; + // retrieve the single-page HTML specification (this is around 12MB in size) + $spec = file_get_contents("html.html"); //$spec = file_get_contents("https://html.spec.whatwg.org/"); // parse it (this may take several seconds if ($spec) { $p = new DOMParser; @@ -186,7 +187,11 @@ class RoboFile extends \Robo\Tasks { } } } + // do a second pass over what's been collected to find global attributes and normalize them to a single "*" entry } + //$spec = file_get_contents("aria.html"); //$spec = file_get_contents("https://www.w3.org/TR/wai-aria/"); + //$spec = file_get_contents("svg-elem.html"); //$spec = file_get_contents("https://www.w3.org/TR/SVG2/eltindex.html"); + //$spec = file_get_contents("svg-attr.html"); //$spec = file_get_contents("https://www.w3.org/TR/SVG2/attindex.html"); // sort and filter the results for unqiueness sort($elems); ksort($attrs); diff --git a/composer.json b/composer.json index 76699d3..93da626 100644 --- a/composer.json +++ b/composer.json @@ -1,5 +1,5 @@ { - "name": "mensbeam/html-sanitizr", + "name": "mensbeam/html-sanitizer", "description": "An implementation of the HTML Sanitizer API", "keywords": ["whatwg", "html", "html5", "sanitizer", "dom"], "type": "library", diff --git a/lib/AbstractSanitizer.php b/lib/AbstractSanitizer.php index 391c8c3..7c9fb86 100644 --- a/lib/AbstractSanitizer.php +++ b/lib/AbstractSanitizer.php @@ -777,6 +777,11 @@ abstract class AbstractSanitizer { # To determine the sanitize action for an attribute given a Sanitizer # configuration dictionary config, run these steps: # Let kind be attribute’s attribute kind. + # The attribute kind of an attribute is one of regular + # or unknown. Let attribute kind be: + # - unknown, if the [HTML] specification does not assign any + # meaning to attribute’s name. + # - regular, otherwise. # If kind is unknown and if config["allowUnknownMarkup"] does not exist or it config["allowUnknownMarkup"] is false: Return drop. # If kind is regular and attribute’s local name does not match any name in the baseline attribute allow list: Return drop. # If attribute matches any attribute match list in config’s attribute drop list: Return drop.