From 8d4a61adaa3d8dfcc107bb8b7493e69e1d4479f0 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 23 Jun 2023 19:07:29 -0400 Subject: [PATCH] Add some original tests --- lib/Microformats/Parser.php | 19 ++++--- tests/cases/StandardTest.php | 41 ++++++++------- .../mensbeam/default-settings/entry-date.html | 9 ++++ .../mensbeam/default-settings/entry-date.json | 16 ++++++ .../default-settings/implied-prop.html | 31 ++++++++++++ .../default-settings/name-conflict.html | 6 +++ .../default-settings/name-conflict.json | 28 +++++++++++ tests/cases/mensbeam/lang-true/lang.html | 15 ++++++ tests/cases/mensbeam/lang-true/lang.json | 50 +++++++++++++++++++ 9 files changed, 188 insertions(+), 27 deletions(-) create mode 100644 tests/cases/mensbeam/default-settings/entry-date.html create mode 100644 tests/cases/mensbeam/default-settings/entry-date.json create mode 100644 tests/cases/mensbeam/default-settings/implied-prop.html create mode 100644 tests/cases/mensbeam/default-settings/name-conflict.html create mode 100644 tests/cases/mensbeam/default-settings/name-conflict.json create mode 100644 tests/cases/mensbeam/lang-true/lang.html create mode 100644 tests/cases/mensbeam/lang-true/lang.json diff --git a/lib/Microformats/Parser.php b/lib/Microformats/Parser.php index 2c7356e..35c282f 100644 --- a/lib/Microformats/Parser.php +++ b/lib/Microformats/Parser.php @@ -355,10 +355,9 @@ class Parser { * @param \DOMElement $node The element to start searching from, including itself */ protected function getRootCandidates(\DOMElement $node): void { - $query = []; - $query[] = './/*[contains(concat(" ", normalize-space(@class)), " h-")]'; + $query = [".", ".//*[contains(concat(' ', normalize-space(@class)), ' h-')]"]; foreach (array_keys(static::BACKCOMPAT_ROOTS) as $root) { - $query[] = './/*[contains(concat(" ", normalize-space(@class), " "), " '.$root.' ")]'; + $query[] = ".//*[contains(concat(' ', normalize-space(@class), ' '), ' $root ')]"; } $query = implode("|", $query); $this->roots = iterator_to_array($this->xpath->query($query, $node)); @@ -711,7 +710,7 @@ class Parser { # else if .h-x>abbr:only-child[title]:not([title=""]):not[.h-*] then use that abbr title for name $name = $set->item(0)->getAttribute("title"); } elseif ( - ($set = $this->xpath->query("./*[not(template) and count(../*) = 1]", $root))->length + ($set = $this->xpath->query("./*[local-name() != 'template' and count(../*) = 1]", $root))->length && !$this->hasRoots($set->item(0)) && ($set = $this->xpath->query("./img[@alt and @alt != '' and count(../*) = 1]", $set->item(0)))->length && !$this->hasRoots($set->item(0)) @@ -719,7 +718,7 @@ class Parser { # else if .h-x>:only-child:not[.h-*]>img:only-child[alt]:not([alt=""]):not[.h-*] then use that img’s alt for name $name = $set->item(0)->getAttribute("alt"); } elseif ( - ($set = $this->xpath->query("./*[not(template) and count(../*) = 1]", $root))->length + ($set = $this->xpath->query("./*[local-name() != 'template' and count(../*) = 1]", $root))->length && !$this->hasRoots($set->item(0)) && ($set = $this->xpath->query("./area[@alt and @alt != '' and count(../*) = 1]", $set->item(0)))->length && !$this->hasRoots($set->item(0)) @@ -727,7 +726,7 @@ class Parser { # else if .h-x>:only-child:not[.h-*]>area:only-child[alt]:not([alt=""]):not[.h-*] then use that area’s alt for name $name = $set->item(0)->getAttribute("alt"); } elseif ( - ($set = $this->xpath->query("./*[not(template) and count(../*) = 1]", $root))->length + ($set = $this->xpath->query("./*[local-name() != 'template' and count(../*) = 1]", $root))->length && !$this->hasRoots($set->item(0)) && ($set = $this->xpath->query("./abbr[@title and @title != '' and count(../*) = 1]", $set->item(0)))->length && !$this->hasRoots($set->item(0)) @@ -758,7 +757,7 @@ class Parser { # else if .h-x>object[data]:only-of-type:not[.h-*] then use that object’s data for photo $photo = $set->item(0)->getAttribute("data"); } elseif ( - ($set = $this->xpath->query("./*[not(template) and count(../*) = 1]", $root))->length + ($set = $this->xpath->query("./*[local-name() != 'template' and count(../*) = 1]", $root))->length && !$this->hasRoots($set->item(0)) && ($set = $this->xpath->query("./img[@src and count(../img) = 1]", $set->item(0)))->length && !$this->hasRoots($set->item(0)) @@ -766,7 +765,7 @@ class Parser { # else if .h-x>:only-child:not[.h-*]>img[src]:only-of-type:not[.h-*], then use the result of "parse an img element for src and alt" (see Sec.1.5) for photo $out['properties']['photo'] = [$this->parseImg($set->item(0))]; } elseif ( - ($set = $this->xpath->query("./*[not(template) and count(../*) = 1]", $root))->length + ($set = $this->xpath->query("./*[local-name() != 'template' and count(../*) = 1]", $root))->length && !$this->hasRoots($set->item(0)) && ($set = $this->xpath->query("./object[@data and count(../object) = 1]", $set->item(0)))->length && !$this->hasRoots($set->item(0)) @@ -797,7 +796,7 @@ class Parser { # else if .h-x>area[href]:only-of-type:not[.h-*], then use that [href] for url $url = $set->item(0)->getAttribute("href"); } elseif ( - ($set = $this->xpath->query("./*[not(template) and count(../*) = 1]", $root))->length + ($set = $this->xpath->query("./*[local-name() != 'template' and count(../*) = 1]", $root))->length && !$this->hasRoots($set->item(0)) && ($set = $this->xpath->query("./a[@href and count(../a) = 1]", $set->item(0)))->length && !$this->hasRoots($set->item(0)) @@ -805,7 +804,7 @@ class Parser { # else if .h-x>:only-child:not[.h-*]>a[href]:only-of-type:not[.h-*], then use that [href] for url $url = $set->item(0)->getAttribute("href"); } elseif ( - ($set = $this->xpath->query("./*[not(template) and count(../*) = 1]", $root))->length + ($set = $this->xpath->query("./*[local-name() != 'template' and count(../*) = 1]", $root))->length && !$this->hasRoots($set->item(0)) && ($set = $this->xpath->query("./area[@href and count(../area) = 1]", $set->item(0)))->length && !$this->hasRoots($set->item(0)) diff --git a/tests/cases/StandardTest.php b/tests/cases/StandardTest.php index f694ab8..2bf7f4e 100644 --- a/tests/cases/StandardTest.php +++ b/tests/cases/StandardTest.php @@ -29,32 +29,36 @@ class StandardTest extends \PHPUnit\Framework\TestCase { if (isset(self::SUPPRESSED[$name])) { $this->markTestIncomplete(self::SUPPRESSED[$name]); } + // parse input + $act = Microformats::fromFile($path.".html", "text/html; charset=UTF-8", "http://example.com/", $options); // read expectation data $exp = json_decode(file_get_contents($path.".json"), true); - // fix up expectation where necessary - array_walk_recursive($exp, function(&$v) { - // URLs differ trivially from output of our normalization library - $v = preg_replace('#^https?://[^/]+$#', "$0/", $v); - }); - // URLs also need fixing as keys in rel-urls - foreach ($exp['rel-urls'] as $k => $v) { - $fixed = preg_replace('#^https?://[^/]+$#', "$0/", $k); - $exp['rel-urls'][$fixed] = $v; - if ($fixed !== $k) { - unset($exp['rel-urls'][$k]); + if ($exp) { + // fix up expectation where necessary + array_walk_recursive($exp, function(&$v) { + // URLs differ trivially from output of our normalization library + $v = preg_replace('#^https?://[^/]+$#', "$0/", $v); + }); + // URLs also need fixing as keys in rel-urls + foreach ($exp['rel-urls'] as $k => $v) { + $fixed = preg_replace('#^https?://[^/]+$#', "$0/", $k); + $exp['rel-urls'][$fixed] = $v; + if ($fixed !== $k) { + unset($exp['rel-urls'][$k]); + } } + // perform some further monkey-patching on specific tests + $exp = $this->fixTests($exp, $name); + } else { + // if there are no expectations we're probably developing a new test; print the output as JSON + echo Microformats::toJson($act, \JSON_PRETTY_PRINT | \JSON_UNESCAPED_SLASHES | \JSON_UNESCAPED_UNICODE); + exit; } - // perform some further monkey-patching on specific tests - $exp = $this->fixTests($exp, $name); - // parse input - $act = Microformats::fromFile($path.".html", "text/html; charset=UTF-8", "http://example.com/", $options); // sort both arrays $this->ksort($exp); $this->ksort($act); // run comparison if (!$exp) { - echo json_encode($act, \JSON_PRETTY_PRINT | \JSON_UNESCAPED_SLASHES | \JSON_UNESCAPED_UNICODE); - exit; } $this->assertSame($exp, $act); } @@ -64,6 +68,9 @@ class StandardTest extends \PHPUnit\Framework\TestCase { yield from $this->provideTestList([\MensBeam\Microformats\BASE."vendor-bin/phpunit/vendor/mf2/tests/tests/"], ['simpleTrim' => true]); // tests from php-mf2 yield from $this->provideTestList([\MensBeam\Microformats\BASE."tests/cases/third-party/"], []); + // tests from our own corpus + yield from $this->provideTestList([\MensBeam\Microformats\BASE."tests/cases/mensbeam/default-settings/"], []); + yield from $this->provideTestList([\MensBeam\Microformats\BASE."tests/cases/mensbeam/lang-true/"], ['lang' => true]); } protected function provideTestList(array $tests, ?array $options = null): \Generator { diff --git a/tests/cases/mensbeam/default-settings/entry-date.html b/tests/cases/mensbeam/default-settings/entry-date.html new file mode 100644 index 0000000..54094d6 --- /dev/null +++ b/tests/cases/mensbeam/default-settings/entry-date.html @@ -0,0 +1,9 @@ + +
+ + + +
diff --git a/tests/cases/mensbeam/default-settings/entry-date.json b/tests/cases/mensbeam/default-settings/entry-date.json new file mode 100644 index 0000000..01fc615 --- /dev/null +++ b/tests/cases/mensbeam/default-settings/entry-date.json @@ -0,0 +1,16 @@ +{ + "items": [ + { + "type": [ + "h-entry" + ], + "properties": { + "published": [ + "2023-06-23" + ] + } + } + ], + "rels": {}, + "rel-urls": {} +} diff --git a/tests/cases/mensbeam/default-settings/implied-prop.html b/tests/cases/mensbeam/default-settings/implied-prop.html new file mode 100644 index 0000000..94c3c18 --- /dev/null +++ b/tests/cases/mensbeam/default-settings/implied-prop.html @@ -0,0 +1,31 @@ + +
+ + Template be gone! +
+ +
+ + Template be gone! +
+ +
+ + Template be gone! +
+ +
+ + Template be gone! +
diff --git a/tests/cases/mensbeam/default-settings/name-conflict.html b/tests/cases/mensbeam/default-settings/name-conflict.html new file mode 100644 index 0000000..464f8ee --- /dev/null +++ b/tests/cases/mensbeam/default-settings/name-conflict.html @@ -0,0 +1,6 @@ +
+ 2023-06-06T05:32:12-04:00 + 2023-06-06T05:32:12-04:00 + 2023-06-06T05:32:12-04:00 + 2023-06-06T05:32:12-04:00 +
diff --git a/tests/cases/mensbeam/default-settings/name-conflict.json b/tests/cases/mensbeam/default-settings/name-conflict.json new file mode 100644 index 0000000..644bf1c --- /dev/null +++ b/tests/cases/mensbeam/default-settings/name-conflict.json @@ -0,0 +1,28 @@ +{ + "items": [ + { + "type": [ + "h-test" + ], + "properties": { + "no-conflict": [ + "2023-06-06T05:32:12-04:00" + ], + "dt-wins": [ + "2023-06-06 05:32:12-0400" + ], + "u-wins": [ + "http://example.com/2023-06-06T05:32:12-04:00" + ], + "e-wins": [ + { + "html": "2023-06-06T05:32:12-04:00", + "value": "2023-06-06T05:32:12-04:00" + } + ] + } + } + ], + "rels": {}, + "rel-urls": {} +} diff --git a/tests/cases/mensbeam/lang-true/lang.html b/tests/cases/mensbeam/lang-true/lang.html new file mode 100644 index 0000000..9a37add --- /dev/null +++ b/tests/cases/mensbeam/lang-true/lang.html @@ -0,0 +1,15 @@ + + +
+
+
日本語
+
English
+
+ +
+
+
français
+
+
ᐃᓄᒃᑎᑐᑦ
+
+
diff --git a/tests/cases/mensbeam/lang-true/lang.json b/tests/cases/mensbeam/lang-true/lang.json new file mode 100644 index 0000000..749c2da --- /dev/null +++ b/tests/cases/mensbeam/lang-true/lang.json @@ -0,0 +1,50 @@ +{ + "items": [ + { + "type": [ + "h-test" + ], + "properties": {}, + "children": [ + { + "type": [ + "h-test" + ], + "properties": { + "name": [ + "日本語" + ], + "ook": [ + { + "html": "English", + "value": "English", + "lang": "en" + } + ] + }, + "lang": "ja" + }, + { + "type": [ + "h-test" + ], + "properties": { + "ook": [ + { + "html": "français", + "value": "français", + "lang": "fr" + } + ], + "name": [ + "ᐃᓄᒃᑎᑐᑦ" + ] + }, + "lang": "iu" + } + ] + } + ], + "rels": {}, + "rel-urls": {} +}