|
|
@ -19,7 +19,7 @@ class Document extends AbstractDocument { |
|
|
|
|
|
|
|
// List of elements that are treated as block elements for the purposes of |
|
|
|
// output formatting when serializing |
|
|
|
protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ]; |
|
|
|
protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ]; |
|
|
|
// List of h-elements used when determining extra spacing for the purposes of |
|
|
|
// output formatting when serializing |
|
|
|
protected const H_ELEMENTS = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ]; |
|
|
@ -153,7 +153,7 @@ class Document extends AbstractDocument { |
|
|
|
$qualifiedName = trim($qualifiedName); |
|
|
|
|
|
|
|
try { |
|
|
|
return parent::createAttributeNS($namespaceURI, $qualifiedName); |
|
|
|
return @parent::createAttributeNS($namespaceURI, $qualifiedName); |
|
|
|
} catch (\DOMException $e) { |
|
|
|
// The element name is invalid for XML |
|
|
|
// Replace any offending characters with "UHHHHHH" where H are the |
|
|
@ -185,7 +185,7 @@ class Document extends AbstractDocument { |
|
|
|
if ($className === null) { |
|
|
|
$e = parent::createElementNS($namespaceURI, $qualifiedName, $value); |
|
|
|
} else { |
|
|
|
$e = new $className($this, $qualifiedName, $value); |
|
|
|
$e = new $className($this, $qualifiedName, $value, $namespaceURI ?? ''); |
|
|
|
} |
|
|
|
|
|
|
|
return $e; |
|
|
@ -207,14 +207,31 @@ class Document extends AbstractDocument { |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
public function importNode(\DOMNode $node, bool $deep = false) { |
|
|
|
$node = parent::importNode($node, $deep); |
|
|
|
protected function importNativeElement(\DOMElement $node, bool $deep = false): \DOMNode { |
|
|
|
/*var_export($node->nodeName); |
|
|
|
if ($node->nodeName === 'c-') { |
|
|
|
die(var_export($node->ownerDocument->saveHTML($node))); |
|
|
|
}*/ |
|
|
|
if ($node instanceof \DOMElement) { |
|
|
|
$copy = $this->createElementNS($node->namespaceURI, $node->nodeName); |
|
|
|
foreach ($node->attributes as $attribute) { |
|
|
|
$copy->setAttributeNS($attribute->namespaceURI, $attribute->nodeName, $attribute->value); |
|
|
|
} |
|
|
|
|
|
|
|
if ($node instanceof Element && !$node instanceof HTMLElement && !$node instanceof SVGElement && !$node instanceof MathMLElement) { |
|
|
|
$node = $this->convertElementToSubClass($node); |
|
|
|
if ($deep) { |
|
|
|
foreach ($node->childNodes as $child) { |
|
|
|
$copy->appendChild(($child instanceof \DOMElement) ? $this->importNativeElement($child, true) : $this->importNode($child, true)); |
|
|
|
} |
|
|
|
} |
|
|
|
} else { |
|
|
|
$copy = $this->importNode($node, $deep); |
|
|
|
} |
|
|
|
|
|
|
|
return $node; |
|
|
|
return $copy; |
|
|
|
} |
|
|
|
|
|
|
|
public function importNode(\DOMNode $node, bool $deep = false) { |
|
|
|
return ($node instanceof \DOMElement) ? $this->importNativeElement($node, $deep) : parent::importNode($node, $deep); |
|
|
|
} |
|
|
|
|
|
|
|
public function load($filename, $options = null, ?string $encoding = null): bool { |
|
|
@ -249,44 +266,6 @@ class Document extends AbstractDocument { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/*while (true) { |
|
|
|
$elements = $this->walk(function($n) { |
|
|
|
if ($n instanceof Element && !$n instanceof HTMLElement && !$n instanceof SVGElement && !$n instanceof MathMLElement && $n->nodeName !== 'template' && $this->qualifiedNameToClassName($n->nodeName, $n->namespaceURI) !== null) { |
|
|
|
return true; |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
echo (memory_get_peak_usage() / 1024 / 1024) . "\n"; |
|
|
|
|
|
|
|
$element = $elements->current(); |
|
|
|
if ($element !== null) { |
|
|
|
$element->parentNode->replaceChild($this->convertElementToSubClass($element), $element); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
while (true) { |
|
|
|
// Do templates last so any child elements can be converted to their appropriate |
|
|
|
// sub classes before the template is converted. |
|
|
|
$elements = $this->walk(function($n) { |
|
|
|
if ($n instanceof Element && !$n instanceof HTMLElement && !$n instanceof SVGElement && !$n instanceof MathMLElement && $n->namespaceURI === null && $n->nodeName === 'template') { |
|
|
|
return true; |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
echo (memory_get_peak_usage() / 1024 / 1024) . "\n"; |
|
|
|
|
|
|
|
$element = $elements->current(); |
|
|
|
if ($element !== null) { |
|
|
|
$element->parentNode->replaceChild($this->convertElementToSubClass($element), $element); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
}*/ |
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
@ -812,24 +791,19 @@ class Document extends AbstractDocument { |
|
|
|
|
|
|
|
|
|
|
|
private function convertElementToSubClass(\DOMElement $element): \DOMElement { |
|
|
|
$className = $this->qualifiedNameToClassName($element->nodeName, $element->namespaceURI); |
|
|
|
if ($className !== null) { |
|
|
|
$newElement = $this->createElement($element->nodeName); |
|
|
|
|
|
|
|
while ($element->attributes->length > 0) { |
|
|
|
$newElement->setAttributeNode($element->attributes->item(0)); |
|
|
|
} |
|
|
|
$newElement = $this->createElement($element->nodeName); |
|
|
|
|
|
|
|
$target = (!$newElement instanceof HTMLTemplateElement) ? $newElement : $newElement->content; |
|
|
|
while ($element->attributes->length > 0) { |
|
|
|
$newElement->setAttributeNode($element->attributes->item(0)); |
|
|
|
} |
|
|
|
|
|
|
|
while ($element->hasChildNodes()) { |
|
|
|
$target->appendChild($element->firstChild); |
|
|
|
} |
|
|
|
$target = (!$newElement instanceof HTMLTemplateElement) ? $newElement : $newElement->content; |
|
|
|
|
|
|
|
$element = $newElement; |
|
|
|
while ($element->hasChildNodes()) { |
|
|
|
$target->appendChild($element->firstChild); |
|
|
|
} |
|
|
|
|
|
|
|
return $element; |
|
|
|
return $newElement; |
|
|
|
} |
|
|
|
|
|
|
|
private function qualifiedNameToClassName(string $qualifiedName, ?string $namespaceURI = null): ?string { |
|
|
@ -837,6 +811,38 @@ class Document extends AbstractDocument { |
|
|
|
switch ($qualifiedName) { |
|
|
|
case 'a': $className = 'HTMLAnchorElement'; |
|
|
|
break; |
|
|
|
case 'abbr': |
|
|
|
case 'address': |
|
|
|
case 'article': |
|
|
|
case 'aside': |
|
|
|
case 'b': |
|
|
|
case 'bdi': |
|
|
|
case 'bdo': |
|
|
|
case 'cite': |
|
|
|
case 'dd': |
|
|
|
case 'dfn': |
|
|
|
case 'dt': |
|
|
|
case 'footer': |
|
|
|
case 'header': |
|
|
|
case 'hgroup': |
|
|
|
case 'i': |
|
|
|
case 'kbd': |
|
|
|
case 'main': |
|
|
|
case 'mark': |
|
|
|
case 'rp': |
|
|
|
case 'rt': |
|
|
|
case 'ruby': |
|
|
|
case 's': |
|
|
|
case 'samp': |
|
|
|
case 'section': |
|
|
|
case 'small': |
|
|
|
case 'strong': |
|
|
|
case 'sub': |
|
|
|
case 'sup': |
|
|
|
case 'u': |
|
|
|
case 'var': |
|
|
|
case 'wbr': $className = 'HTMLElement'; |
|
|
|
break; |
|
|
|
case 'area': $className = 'HTMLAreaElement'; |
|
|
|
break; |
|
|
|
case 'audio': $className = 'HTMLAudioElement'; |
|
|
@ -851,14 +857,32 @@ class Document extends AbstractDocument { |
|
|
|
break; |
|
|
|
case 'canvas': $className = 'HTMLCanvasElement'; |
|
|
|
break; |
|
|
|
case 'caption': $className = 'HTMLTableCaptionElement'; |
|
|
|
break; |
|
|
|
case 'data': $className = 'HTMLDataElement'; |
|
|
|
break; |
|
|
|
case 'datalist': $className = 'HTMLDataListElement'; |
|
|
|
break; |
|
|
|
case 'del': $className = 'HTMLModElement'; |
|
|
|
break; |
|
|
|
case 'details': $className = 'HTMLDetailsElement'; |
|
|
|
break; |
|
|
|
case 'dialog': $className = 'HTMLDetailsElement'; |
|
|
|
break; |
|
|
|
case 'div': $className = 'HTMLDivElement'; |
|
|
|
break; |
|
|
|
case 'dl': $className = 'HTMLDListElement'; |
|
|
|
break; |
|
|
|
case 'embed': $className = 'HTMLEmbedElement'; |
|
|
|
break; |
|
|
|
case 'fieldset': $className = 'HTMLFieldsetElement'; |
|
|
|
break; |
|
|
|
case 'font': $className = 'HTMLFontElement'; |
|
|
|
break; |
|
|
|
case 'form': $className = 'HTMLFormElement'; |
|
|
|
break; |
|
|
|
case 'frameset': $className = 'HTMLFramesetElement'; |
|
|
|
break; |
|
|
|
case 'head': $className = 'HTMLHeadElement'; |
|
|
|
break; |
|
|
|
case 'h1': |
|
|
@ -868,13 +892,87 @@ class Document extends AbstractDocument { |
|
|
|
case 'h5': |
|
|
|
case 'h6': $className = 'HTMLHeadingElement'; |
|
|
|
break; |
|
|
|
case 'hr': $className = 'HTMLHRElement'; |
|
|
|
break; |
|
|
|
case 'html': $className = 'HTMLHtmlElement'; |
|
|
|
break; |
|
|
|
case 'iframe': $className = 'HTMLIFrameElement'; |
|
|
|
break; |
|
|
|
case 'input': $className = 'HTMLInputElement'; |
|
|
|
break; |
|
|
|
case 'ins': $className = 'HTMLModElement'; |
|
|
|
break; |
|
|
|
case 'label': $className = 'HTMLLabelElement'; |
|
|
|
break; |
|
|
|
case 'legend': $className = 'HTMLLegendElement'; |
|
|
|
break; |
|
|
|
case 'li': $className = 'HTMLLIElement'; |
|
|
|
break; |
|
|
|
case 'link': $className = 'HTMLLinkElement'; |
|
|
|
break; |
|
|
|
case 'map': $className = 'HTMLMapElement'; |
|
|
|
break; |
|
|
|
case 'media': $className = 'HTMLMediaElement'; |
|
|
|
break; |
|
|
|
case 'menu': $className = 'HTMLMenuElement'; |
|
|
|
break; |
|
|
|
case 'meta': $className = 'HTMLMetaElement'; |
|
|
|
break; |
|
|
|
case 'object': $className = 'HTMLObjectElement'; |
|
|
|
break; |
|
|
|
case 'ol': $className = 'HTMLOListElement'; |
|
|
|
break; |
|
|
|
case 'optgroup': $className = 'HTMLOptGroupElement'; |
|
|
|
break; |
|
|
|
case 'option': $className = 'HTMLOptionElement'; |
|
|
|
break; |
|
|
|
case 'output': $className = 'HTMLOutputElement'; |
|
|
|
break; |
|
|
|
case 'p': $className = 'HTMLParagraphElement'; |
|
|
|
break; |
|
|
|
case 'param': $className = 'HTMLParamElement'; |
|
|
|
break; |
|
|
|
case 'picture': $className = 'HTMLPictureElement'; |
|
|
|
break; |
|
|
|
case 'pre': $className = 'HTMLPreElement'; |
|
|
|
break; |
|
|
|
case 'progress': $className = 'HTMLProgressElement'; |
|
|
|
break; |
|
|
|
case 'q': $className = 'HTMLQuoteElement'; |
|
|
|
break; |
|
|
|
case 'script': $className = 'HTMLScriptElement'; |
|
|
|
break; |
|
|
|
case 'select': $className = 'HTMLSelectElement'; |
|
|
|
break; |
|
|
|
case 'slot': $className = 'HTMLSlotElement'; |
|
|
|
break; |
|
|
|
case 'source': $className = 'HTMLSourceElement'; |
|
|
|
break; |
|
|
|
case 'span': $className = 'HTMLSpanElement'; |
|
|
|
break; |
|
|
|
case 'style': $className = 'HTMLStyleElement'; |
|
|
|
break; |
|
|
|
case 'table': $className = 'HTMLTableElement'; |
|
|
|
break; |
|
|
|
case 'tbody': |
|
|
|
case 'tfoot': |
|
|
|
case 'thead': $className = 'HTMLTableSectionElement'; |
|
|
|
break; |
|
|
|
case 'tr': $className = 'HTMLTableRowElement'; |
|
|
|
break; |
|
|
|
case 'template': $className = 'HTMLTemplateElement'; |
|
|
|
break; |
|
|
|
default: return null; |
|
|
|
case 'textarea': $className = 'HTMLTextAreaElement'; |
|
|
|
break; |
|
|
|
case 'time': $className = 'HTMLTimeElement'; |
|
|
|
break; |
|
|
|
case 'track': $className = 'HTMLTrackElement'; |
|
|
|
break; |
|
|
|
case 'ul': $className = 'HTMLUListElement'; |
|
|
|
break; |
|
|
|
case 'video': $className = 'HTMLVideoElement'; |
|
|
|
break; |
|
|
|
default: $className = 'HTMLUnknownElement'; |
|
|
|
} |
|
|
|
} /*elseif ($namespaceURI === Parser::SVG_NAMESPACE) { |
|
|
|
return null; |
|
|
|