Browse Source

Move the creation of documents to separate functions

This will make document creation easier to override for e.g. enhanced
DOM implementations.
domparser
J. King 1 year ago
parent
commit
03a42a2343
  1. 27
      lib/DOMParser.php

27
lib/DOMParser.php

@ -50,14 +50,13 @@ XMLDECL;
// parse the string as either HTML or XML
if ($t->isHtml) {
// for HTML we invoke our parser which has its own handling for everything
return Parser::parse($string, $type)->document;
return $this->createDocumentHtml($string, $type);
} elseif ($t->isXml) {
// for XML we have to jump through a few hoops to deal with encoding;
// if we have a known encoding we want to make sure the XML parser
// doesn't try to do its own detection. The only way to do this is
// to convert to UTF-8 where necessary and remove any XML
// declaration encoding information
$doc = new \DOMDocument();
try {
// first check for a byte order mark; if one exists we can go straight to parsing
if (!Encoding::sniffBOM($string)) {
@ -80,6 +79,9 @@ XMLDECL;
// or ISO 2022-JP never contain non-ASCII characters
// before encoding information is seen)
if ($charset === "UTF-16BE" || $charset === "UTF-16LE") {
// NOTE: the transcoding operation may throw an
// exception, which is why this whole operation
// is wrapped in a try block
$decoder = Encoding::createDecoder($charset, $string, true, false);
$string = "";
while (strlen($c = $decoder->nextChar())) {
@ -100,16 +102,25 @@ XMLDECL;
}
}
// parse the document
if (!$doc->loadXML($string, \LIBXML_NONET | \LIBXML_BIGLINES | \LIBXML_COMPACT |\LIBXML_NOWARNING | \LIBXML_NOERROR)) {
throw new \Exception(libxml_get_last_error()->message);
}
return $this->createDocumentXml($string);
} catch (\Exception $e) {
$doc->appendChild($doc->createElementNS("http://www.mozilla.org/newlayout/xml/parsererror.xml", "parserror"));
$doc->documentElement->appendChild($doc->createTextNode($e->getMessage()));
$string = "<parsererror xmlns=\"http://www.mozilla.org/newlayout/xml/parsererror.xml\">".htmlspecialchars($e->getMessage(), \ENT_NOQUOTES | \ENT_SUBSTITUTE | \ENT_XML1, "UTF-8")."</parsererror>";
return $this->createDocumentXml($string);
}
return $doc;
} else {
throw new \InvalidArgumentException("\$type must be \"text/html\" or an XML type");
}
}
protected function createDocumentHtml(string $string, string $type): \DOMDocument {
return Parser::parse($string, $type)->document;
}
protected function createDocumentXml(string $string): \DOMDocument {
$document = new \DOMDocument;
if (!$document->loadXML($string, \LIBXML_NONET | \LIBXML_BIGLINES | \LIBXML_COMPACT |\LIBXML_NOWARNING | \LIBXML_NOERROR)) {
throw new \Exception(libxml_get_last_error()->message);
}
return $document;
}
}
Loading…
Cancel
Save