From 398180fee61f89ee97bbd2a02787caadd3d334d0 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Wed, 29 Mar 2023 20:04:09 -0400 Subject: [PATCH] Partial implementation of DOMParser --- lib/DOMParser.php | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 lib/DOMParser.php diff --git a/lib/DOMParser.php b/lib/DOMParser.php new file mode 100644 index 0000000..baad496 --- /dev/null +++ b/lib/DOMParser.php @@ -0,0 +1,58 @@ +essence, self::TYPES)) { + throw new \InvalidArgumentException("\$type must be one of ".implode(", ", self::TYPES)); + } + $charset = $t->params['charset'] ?? "UTF-8"; + $encoding = Encoding::matchLabel($charset); + if (!$encoding) { + throw new \InvalidArgumentException("Specified charset is not supported"); + } + $charset = $encoding['name']; + // parse the string as either HTML or XML + if ($t->essence === "text/html") { + // for HTML we invoke our parser + $config = new Parser\Config; + $config->encodingFallback = "UTF-8"; + $config->encodingPrescanBytes = 0; + return Parser::parse($string, $charset, $config); + } else { + // for XML we have to jump through a few hoops to make sure the DOMDocument doesn't make a hash of things, or try to detect encoding + } + } +} \ No newline at end of file