Fix simple textContent

This commit is contained in:
J. King 2023-07-27 12:25:20 -04:00
parent a14a5ece34
commit 96d6d7ee01
2 changed files with 17 additions and 12 deletions

View file

@ -1398,16 +1398,19 @@ class Parser {
# replacing any nested <img> elements with their alt attribute, if
# present; otherwise their src attribute, if present, adding a
# space at the beginning and end, resolving the URL if its
# relative;
foreach ($copy->getElementsByTagName("img") as $e) {
$alt = $e->getAttribute("alt");
$src = " ".($e->hasAttribute("src") ? $this->normalizeUrl($e->getAttribute("src")) : "")." ";
if ($prefix === "e") {
$attr = strlen($alt) ? $alt : $src;
} else {
$attr = strlen($alt) ? $alt : "";
# relative; [p- and e- only]
if (in_array($prefix, ["p", "e"])) {
foreach ($copy->getElementsByTagName("img") as $e) {
$attr = null;
if ($e->hasAttribute("alt")) {
$attr = $alt = $e->getAttribute("alt");
} elseif ($e->hasAttribute("src")) {
$attr = " ".($e->hasAttribute("src") ? $this->normalizeUrl($e->getAttribute("src")) : "")." ";
}
if ($attr !== null) {
$e->parentNode->replaceChild($e->ownerDocument->createTextNode($attr), $e);
}
}
$e->parentNode->replaceChild($e->ownerDocument->createTextNode($attr), $e);
}
# removing all leading/trailing spaces
return $this->trim($copy->textContent);

View file

@ -36,14 +36,16 @@ class StandardTest extends \PHPUnit\Framework\TestCase {
// read expectation data
$exp = json_decode(file_get_contents($path.".json"), true);
if ($exp) {
$pattern = '#(\bhttps?://[^/ "\']+)($|[ "\'])#';
$replace = "$1/$2";
// fix up expectation where necessary
array_walk_recursive($exp, function(&$v) {
array_walk_recursive($exp, function(&$v) use ($pattern, $replace) {
// URLs differ trivially from output of our normalization library
$v = preg_replace('#^https?://[^/]+$#', "$0/", $v);
$v = preg_replace($pattern, $replace, $v);
});
// URLs also need fixing as keys in rel-urls
foreach ($exp['rel-urls'] as $k => $v) {
$fixed = preg_replace('#^https?://[^/]+$#', "$0/", $k);
$fixed = preg_replace($pattern, $replace, $k);
$exp['rel-urls'][$fixed] = $v;
if ($fixed !== $k) {
unset($exp['rel-urls'][$k]);