Browse Source

Various fixes

- No other implementation deduplicate mf2 properties, so we shouldn't
do things differently from others
- Attribute values shouldn't be trimmed except in implied name parsing
master
J. King 10 months ago
parent
commit
b0de7d2755
  1. 12
      lib/Microformats/Parser.php
  2. 22
      tests/cases/mensbeam/default-settings/name-conflict.html
  3. 28
      tests/cases/mensbeam/default-settings/name-conflict.json

12
lib/Microformats/Parser.php

@ -322,7 +322,7 @@ class Parser {
# "text": the text content of the element if any
foreach (["hreflang", "media", "title", "type"] as $attr) {
if (!isset($out['rel-urls'][$url][$attr]) && $link->hasAttribute($attr)) {
$out['rel-urls'][$url][$attr] = trim($link->getAttribute($attr));
$out['rel-urls'][$url][$attr] = $link->getAttribute($attr);
}
}
if (!isset($out['rel-urls'][$url]['text']) && strlen($text = $this->getCleanText($link, "p"))) {
@ -386,7 +386,7 @@ class Parser {
protected function parseTokens(\DOMElement $node, string $attr): array {
$attr = trim($node->getAttribute($attr), " \r\n\t\f");
if ($attr !== "") {
return array_unique(preg_split("/[ \r\n\t\f]+/sS", $attr));
return preg_split("/[ \r\n\t\f]+/sS", $attr);
} else {
return [];
}
@ -401,7 +401,7 @@ class Parser {
* @param array $classes The array of class names to filter
*/
protected function matchRootsMf2(array $classes): array {
return array_filter($classes, function($c) {
return array_filter(array_unique($classes), function($c) {
# The "*" for root (and property) class names consists of an
# optional vendor prefix (series of 1+ number or lowercase
# a-z characters i.e. [0-9a-z]+, followed by '-'), then one
@ -470,6 +470,8 @@ class Parser {
if (preg_match('/^(p|u|dt|e)((?:-[a-z0-9]+)?(?:-[a-z]+)+)$/S', $c, $match)) {
$prefix = $match[1];
$name = substr($match[2], 1);
/* Other implementations don't perform de-duplication
See https://github.com/microformats/microformats2-parsing/issues/61
if (!isset($out[$name])) {
// property with this name has not been seen yet; add it
$out[$name] = [$prefix, $name];
@ -477,6 +479,8 @@ class Parser {
// property prefix is of a higher rank than one already seen; use the new prefix
$out[$name][0] = $prefix;
}
*/
$out[] = [$prefix, $name];
}
}
return array_values($out);
@ -1085,7 +1089,7 @@ class Parser {
# value: the element's src attribute as a normalized absolute URL
'value' => $this->normalizeUrl($node->getAttribute("src")),
# alt: the element's alt attribute
'alt' => trim($node->getAttribute("alt")),
'alt' => $node->getAttribute("alt"),
];
} else {
# else return the element's src attribute as a normalized absolute URL

22
tests/cases/mensbeam/default-settings/name-conflict.html

@ -1,22 +0,0 @@
<div class="h-test">
<span class="p-no-conflict">
<span class="value">2023-06-06</span>
<span class="value">05:32:12</span>
<span class="value">-04:00</span>
</span>
<span class="p-dt-wins dt-dt-wins">
<span class="value">2023-06-06</span>
<span class="value">05:32:12</span>
<span class="value">-04:00</span>
</span>
<span class="p-u-wins dt-u-wins u-u-wins">
<span class="value">2023-06-06</span>
<span class="value">05:32:12</span>
<span class="value">-04:00</span>
</span>
<span class="p-e-wins dt-e-wins u-e-wins e-e-wins">
<span class="value">2023-06-06</span>
<span class="value">05:32:12</span>
<span class="value">-04:00</span>
</span>
</div>

28
tests/cases/mensbeam/default-settings/name-conflict.json

@ -1,28 +0,0 @@
{
"items": [
{
"type": [
"h-test"
],
"properties": {
"no-conflict": [
"2023-06-0605:32:12-04:00"
],
"dt-wins": [
"2023-06-06 05:32:12-0400"
],
"u-wins": [
"http://example.com/2023-06-0605:32:12-04:00"
],
"e-wins": [
{
"html": "<span class=\"value\">2023-06-06</span>\n <span class=\"value\">05:32:12</span>\n <span class=\"value\">-04:00</span>",
"value": "2023-06-06 05:32:12 -04:00"
}
]
}
}
],
"rels": {},
"rel-urls": {}
}
Loading…
Cancel
Save