Fix simple textContent
This commit is contained in:
parent
a14a5ece34
commit
96d6d7ee01
2 changed files with 17 additions and 12 deletions
|
@ -1398,16 +1398,19 @@ class Parser {
|
|||
# replacing any nested <img> elements with their alt attribute, if
|
||||
# present; otherwise their src attribute, if present, adding a
|
||||
# space at the beginning and end, resolving the URL if it’s
|
||||
# relative;
|
||||
foreach ($copy->getElementsByTagName("img") as $e) {
|
||||
$alt = $e->getAttribute("alt");
|
||||
$src = " ".($e->hasAttribute("src") ? $this->normalizeUrl($e->getAttribute("src")) : "")." ";
|
||||
if ($prefix === "e") {
|
||||
$attr = strlen($alt) ? $alt : $src;
|
||||
} else {
|
||||
$attr = strlen($alt) ? $alt : "";
|
||||
# relative; [p- and e- only]
|
||||
if (in_array($prefix, ["p", "e"])) {
|
||||
foreach ($copy->getElementsByTagName("img") as $e) {
|
||||
$attr = null;
|
||||
if ($e->hasAttribute("alt")) {
|
||||
$attr = $alt = $e->getAttribute("alt");
|
||||
} elseif ($e->hasAttribute("src")) {
|
||||
$attr = " ".($e->hasAttribute("src") ? $this->normalizeUrl($e->getAttribute("src")) : "")." ";
|
||||
}
|
||||
if ($attr !== null) {
|
||||
$e->parentNode->replaceChild($e->ownerDocument->createTextNode($attr), $e);
|
||||
}
|
||||
}
|
||||
$e->parentNode->replaceChild($e->ownerDocument->createTextNode($attr), $e);
|
||||
}
|
||||
# removing all leading/trailing spaces
|
||||
return $this->trim($copy->textContent);
|
||||
|
|
|
@ -36,14 +36,16 @@ class StandardTest extends \PHPUnit\Framework\TestCase {
|
|||
// read expectation data
|
||||
$exp = json_decode(file_get_contents($path.".json"), true);
|
||||
if ($exp) {
|
||||
$pattern = '#(\bhttps?://[^/ "\']+)($|[ "\'])#';
|
||||
$replace = "$1/$2";
|
||||
// fix up expectation where necessary
|
||||
array_walk_recursive($exp, function(&$v) {
|
||||
array_walk_recursive($exp, function(&$v) use ($pattern, $replace) {
|
||||
// URLs differ trivially from output of our normalization library
|
||||
$v = preg_replace('#^https?://[^/]+$#', "$0/", $v);
|
||||
$v = preg_replace($pattern, $replace, $v);
|
||||
});
|
||||
// URLs also need fixing as keys in rel-urls
|
||||
foreach ($exp['rel-urls'] as $k => $v) {
|
||||
$fixed = preg_replace('#^https?://[^/]+$#', "$0/", $k);
|
||||
$fixed = preg_replace($pattern, $replace, $k);
|
||||
$exp['rel-urls'][$fixed] = $v;
|
||||
if ($fixed !== $k) {
|
||||
unset($exp['rel-urls'][$k]);
|
||||
|
|
Loading…
Reference in a new issue