From 96d6d7ee01320071bfb2f99374700b9eac4ccf80 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Thu, 27 Jul 2023 12:25:20 -0400 Subject: [PATCH] Fix simple textContent --- lib/Microformats/Parser.php | 21 ++++++++++++--------- tests/cases/StandardTest.php | 8 +++++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/lib/Microformats/Parser.php b/lib/Microformats/Parser.php index fb2c926..bf0c7b8 100644 --- a/lib/Microformats/Parser.php +++ b/lib/Microformats/Parser.php @@ -1398,16 +1398,19 @@ class Parser { # replacing any nested elements with their alt attribute, if # present; otherwise their src attribute, if present, adding a # space at the beginning and end, resolving the URL if it’s - # relative; - foreach ($copy->getElementsByTagName("img") as $e) { - $alt = $e->getAttribute("alt"); - $src = " ".($e->hasAttribute("src") ? $this->normalizeUrl($e->getAttribute("src")) : "")." "; - if ($prefix === "e") { - $attr = strlen($alt) ? $alt : $src; - } else { - $attr = strlen($alt) ? $alt : ""; + # relative; [p- and e- only] + if (in_array($prefix, ["p", "e"])) { + foreach ($copy->getElementsByTagName("img") as $e) { + $attr = null; + if ($e->hasAttribute("alt")) { + $attr = $alt = $e->getAttribute("alt"); + } elseif ($e->hasAttribute("src")) { + $attr = " ".($e->hasAttribute("src") ? $this->normalizeUrl($e->getAttribute("src")) : "")." "; + } + if ($attr !== null) { + $e->parentNode->replaceChild($e->ownerDocument->createTextNode($attr), $e); + } } - $e->parentNode->replaceChild($e->ownerDocument->createTextNode($attr), $e); } # removing all leading/trailing spaces return $this->trim($copy->textContent); diff --git a/tests/cases/StandardTest.php b/tests/cases/StandardTest.php index 1aca503..6a4a39c 100644 --- a/tests/cases/StandardTest.php +++ b/tests/cases/StandardTest.php @@ -36,14 +36,16 @@ class StandardTest extends \PHPUnit\Framework\TestCase { // read expectation data $exp = json_decode(file_get_contents($path.".json"), true); if ($exp) { + $pattern = '#(\bhttps?://[^/ "\']+)($|[ "\'])#'; + $replace = "$1/$2"; // fix up expectation where necessary - array_walk_recursive($exp, function(&$v) { + array_walk_recursive($exp, function(&$v) use ($pattern, $replace) { // URLs differ trivially from output of our normalization library - $v = preg_replace('#^https?://[^/]+$#', "$0/", $v); + $v = preg_replace($pattern, $replace, $v); }); // URLs also need fixing as keys in rel-urls foreach ($exp['rel-urls'] as $k => $v) { - $fixed = preg_replace('#^https?://[^/]+$#', "$0/", $k); + $fixed = preg_replace($pattern, $replace, $k); $exp['rel-urls'][$fixed] = $v; if ($fixed !== $k) { unset($exp['rel-urls'][$k]);