diff --git a/lib/Document.php b/lib/Document.php index c2bb6b7..b5cc450 100644 --- a/lib/Document.php +++ b/lib/Document.php @@ -19,7 +19,7 @@ class Document extends AbstractDocument { // List of elements that are treated as block elements for the purposes of // output formatting when serializing - protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ]; + protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ]; // List of h-elements used when determining extra spacing for the purposes of // output formatting when serializing protected const H_ELEMENTS = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ]; @@ -153,7 +153,7 @@ class Document extends AbstractDocument { $qualifiedName = trim($qualifiedName); try { - return parent::createAttributeNS($namespaceURI, $qualifiedName); + return @parent::createAttributeNS($namespaceURI, $qualifiedName); } catch (\DOMException $e) { // The element name is invalid for XML // Replace any offending characters with "UHHHHHH" where H are the @@ -185,7 +185,7 @@ class Document extends AbstractDocument { if ($className === null) { $e = parent::createElementNS($namespaceURI, $qualifiedName, $value); } else { - $e = new $className($this, $qualifiedName, $value); + $e = new $className($this, $qualifiedName, $value, $namespaceURI ?? ''); } return $e; @@ -207,14 +207,31 @@ class Document extends AbstractDocument { return false; } - public function importNode(\DOMNode $node, bool $deep = false) { - $node = parent::importNode($node, $deep); + protected function importNativeElement(\DOMElement $node, bool $deep = false): \DOMNode { + /*var_export($node->nodeName); + if ($node->nodeName === 'c-') { + die(var_export($node->ownerDocument->saveHTML($node))); + }*/ + if ($node instanceof \DOMElement) { + $copy = $this->createElementNS($node->namespaceURI, $node->nodeName); + foreach ($node->attributes as $attribute) { + $copy->setAttributeNS($attribute->namespaceURI, $attribute->nodeName, $attribute->value); + } - if ($node instanceof Element && !$node instanceof HTMLElement && !$node instanceof SVGElement && !$node instanceof MathMLElement) { - $node = $this->convertElementToSubClass($node); + if ($deep) { + foreach ($node->childNodes as $child) { + $copy->appendChild(($child instanceof \DOMElement) ? $this->importNativeElement($child, true) : $this->importNode($child, true)); + } + } + } else { + $copy = $this->importNode($node, $deep); } - return $node; + return $copy; + } + + public function importNode(\DOMNode $node, bool $deep = false) { + return ($node instanceof \DOMElement) ? $this->importNativeElement($node, $deep) : parent::importNode($node, $deep); } public function load($filename, $options = null, ?string $encoding = null): bool { @@ -249,44 +266,6 @@ class Document extends AbstractDocument { } } - /*while (true) { - $elements = $this->walk(function($n) { - if ($n instanceof Element && !$n instanceof HTMLElement && !$n instanceof SVGElement && !$n instanceof MathMLElement && $n->nodeName !== 'template' && $this->qualifiedNameToClassName($n->nodeName, $n->namespaceURI) !== null) { - return true; - } - }); - - echo (memory_get_peak_usage() / 1024 / 1024) . "\n"; - - $element = $elements->current(); - if ($element !== null) { - $element->parentNode->replaceChild($this->convertElementToSubClass($element), $element); - continue; - } - - break; - } - - while (true) { - // Do templates last so any child elements can be converted to their appropriate - // sub classes before the template is converted. - $elements = $this->walk(function($n) { - if ($n instanceof Element && !$n instanceof HTMLElement && !$n instanceof SVGElement && !$n instanceof MathMLElement && $n->namespaceURI === null && $n->nodeName === 'template') { - return true; - } - }); - - echo (memory_get_peak_usage() / 1024 / 1024) . "\n"; - - $element = $elements->current(); - if ($element !== null) { - $element->parentNode->replaceChild($this->convertElementToSubClass($element), $element); - continue; - } - - break; - }*/ - return true; } @@ -812,24 +791,19 @@ class Document extends AbstractDocument { private function convertElementToSubClass(\DOMElement $element): \DOMElement { - $className = $this->qualifiedNameToClassName($element->nodeName, $element->namespaceURI); - if ($className !== null) { - $newElement = $this->createElement($element->nodeName); - - while ($element->attributes->length > 0) { - $newElement->setAttributeNode($element->attributes->item(0)); - } + $newElement = $this->createElement($element->nodeName); - $target = (!$newElement instanceof HTMLTemplateElement) ? $newElement : $newElement->content; + while ($element->attributes->length > 0) { + $newElement->setAttributeNode($element->attributes->item(0)); + } - while ($element->hasChildNodes()) { - $target->appendChild($element->firstChild); - } + $target = (!$newElement instanceof HTMLTemplateElement) ? $newElement : $newElement->content; - $element = $newElement; + while ($element->hasChildNodes()) { + $target->appendChild($element->firstChild); } - return $element; + return $newElement; } private function qualifiedNameToClassName(string $qualifiedName, ?string $namespaceURI = null): ?string { @@ -837,6 +811,38 @@ class Document extends AbstractDocument { switch ($qualifiedName) { case 'a': $className = 'HTMLAnchorElement'; break; + case 'abbr': + case 'address': + case 'article': + case 'aside': + case 'b': + case 'bdi': + case 'bdo': + case 'cite': + case 'dd': + case 'dfn': + case 'dt': + case 'footer': + case 'header': + case 'hgroup': + case 'i': + case 'kbd': + case 'main': + case 'mark': + case 'rp': + case 'rt': + case 'ruby': + case 's': + case 'samp': + case 'section': + case 'small': + case 'strong': + case 'sub': + case 'sup': + case 'u': + case 'var': + case 'wbr': $className = 'HTMLElement'; + break; case 'area': $className = 'HTMLAreaElement'; break; case 'audio': $className = 'HTMLAudioElement'; @@ -851,14 +857,32 @@ class Document extends AbstractDocument { break; case 'canvas': $className = 'HTMLCanvasElement'; break; + case 'caption': $className = 'HTMLTableCaptionElement'; + break; case 'data': $className = 'HTMLDataElement'; break; case 'datalist': $className = 'HTMLDataListElement'; break; + case 'del': $className = 'HTMLModElement'; + break; case 'details': $className = 'HTMLDetailsElement'; break; case 'dialog': $className = 'HTMLDetailsElement'; break; + case 'div': $className = 'HTMLDivElement'; + break; + case 'dl': $className = 'HTMLDListElement'; + break; + case 'embed': $className = 'HTMLEmbedElement'; + break; + case 'fieldset': $className = 'HTMLFieldsetElement'; + break; + case 'font': $className = 'HTMLFontElement'; + break; + case 'form': $className = 'HTMLFormElement'; + break; + case 'frameset': $className = 'HTMLFramesetElement'; + break; case 'head': $className = 'HTMLHeadElement'; break; case 'h1': @@ -868,13 +892,87 @@ class Document extends AbstractDocument { case 'h5': case 'h6': $className = 'HTMLHeadingElement'; break; + case 'hr': $className = 'HTMLHRElement'; + break; case 'html': $className = 'HTMLHtmlElement'; break; + case 'iframe': $className = 'HTMLIFrameElement'; + break; + case 'input': $className = 'HTMLInputElement'; + break; + case 'ins': $className = 'HTMLModElement'; + break; + case 'label': $className = 'HTMLLabelElement'; + break; + case 'legend': $className = 'HTMLLegendElement'; + break; + case 'li': $className = 'HTMLLIElement'; + break; + case 'link': $className = 'HTMLLinkElement'; + break; + case 'map': $className = 'HTMLMapElement'; + break; + case 'media': $className = 'HTMLMediaElement'; + break; + case 'menu': $className = 'HTMLMenuElement'; + break; + case 'meta': $className = 'HTMLMetaElement'; + break; + case 'object': $className = 'HTMLObjectElement'; + break; + case 'ol': $className = 'HTMLOListElement'; + break; + case 'optgroup': $className = 'HTMLOptGroupElement'; + break; + case 'option': $className = 'HTMLOptionElement'; + break; + case 'output': $className = 'HTMLOutputElement'; + break; case 'p': $className = 'HTMLParagraphElement'; break; + case 'param': $className = 'HTMLParamElement'; + break; + case 'picture': $className = 'HTMLPictureElement'; + break; + case 'pre': $className = 'HTMLPreElement'; + break; + case 'progress': $className = 'HTMLProgressElement'; + break; + case 'q': $className = 'HTMLQuoteElement'; + break; + case 'script': $className = 'HTMLScriptElement'; + break; + case 'select': $className = 'HTMLSelectElement'; + break; + case 'slot': $className = 'HTMLSlotElement'; + break; + case 'source': $className = 'HTMLSourceElement'; + break; + case 'span': $className = 'HTMLSpanElement'; + break; + case 'style': $className = 'HTMLStyleElement'; + break; + case 'table': $className = 'HTMLTableElement'; + break; + case 'tbody': + case 'tfoot': + case 'thead': $className = 'HTMLTableSectionElement'; + break; + case 'tr': $className = 'HTMLTableRowElement'; + break; case 'template': $className = 'HTMLTemplateElement'; break; - default: return null; + case 'textarea': $className = 'HTMLTextAreaElement'; + break; + case 'time': $className = 'HTMLTimeElement'; + break; + case 'track': $className = 'HTMLTrackElement'; + break; + case 'ul': $className = 'HTMLUListElement'; + break; + case 'video': $className = 'HTMLVideoElement'; + break; + default: $className = 'HTMLUnknownElement'; } } /*elseif ($namespaceURI === Parser::SVG_NAMESPACE) { return null; diff --git a/lib/Element.php b/lib/Element.php index 153f923..b8d0d46 100644 --- a/lib/Element.php +++ b/lib/Element.php @@ -5,6 +5,7 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; +use MensBeam\HTML\Parser; class Element extends \DOMElement { use ContainerNode, DocumentOrElement, EscapeString, MagicProperties, Moonwalk, MoonwalkShallow, ParentNode, ToString, Walk, WalkShallow; diff --git a/lib/HTMLElement.php b/lib/HTMLElement.php index 949ff61..715dfb7 100644 --- a/lib/HTMLElement.php +++ b/lib/HTMLElement.php @@ -6,7 +6,7 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; -abstract class HTMLElement extends Element { +class HTMLElement extends Element { use HTMLOrForeignElement; protected function __get_accessKey(): string { diff --git a/lib/HTMLMenuElement.php b/lib/HTMLMenuElement.php new file mode 100644 index 0000000..2e799f5 --- /dev/null +++ b/lib/HTMLMenuElement.php @@ -0,0 +1,10 @@ +content; - }z + } if ($node->hasChildNodes()) { $node->walkMap($callback);