From 8efbbf0907f6c7095dda6ec710555ca44ddda6bb Mon Sep 17 00:00:00 2001 From: "J. King" Date: Wed, 21 Jun 2023 17:43:07 -0400 Subject: [PATCH] Fix more bugs URL normalization still needs to be sorted out --- lib/Parser.php | 7 +- tests/cases/StandardTest.php | 1 + .../phpmf2/classic/hentry-tag.html | 29 +++++ .../phpmf2/classic/hentry-tag.json | 115 ++++++++++++++++++ .../phpmf2/classic/hentry-tag2.html | 5 + .../phpmf2/classic/hentry-tag2.json | 33 +++++ .../phpmf2/classic/hreview-tag.html | 32 +++++ .../phpmf2/classic/hreview-tag.json | 103 ++++++++++++++++ 8 files changed, 322 insertions(+), 3 deletions(-) create mode 100644 tests/cases/third-party/phpmf2/classic/hentry-tag.html create mode 100644 tests/cases/third-party/phpmf2/classic/hentry-tag.json create mode 100644 tests/cases/third-party/phpmf2/classic/hentry-tag2.html create mode 100644 tests/cases/third-party/phpmf2/classic/hentry-tag2.json create mode 100644 tests/cases/third-party/phpmf2/classic/hreview-tag.html create mode 100644 tests/cases/third-party/phpmf2/classic/hreview-tag.json diff --git a/lib/Parser.php b/lib/Parser.php index 667a827..eec0154 100644 --- a/lib/Parser.php +++ b/lib/Parser.php @@ -96,7 +96,7 @@ class Parser { 'rating' => ['h-review' => ["p", "rating"], 'h-review-aggregate' => ["p", "rating"]], 'region' => ['h-adr' => ["p", "region"], 'h-card' => ["p", "region"]], 'rev' => ['h-card' => ["dt", "rev"]], - 'reviewer' => ['h-review' => ["p", "author"]], + 'reviewer' => ['h-review' => ["p", "author", ["vcard"]]], 'review' => ['h-product' => ["p", "review", ["hreview"]]], // also requires special processing 'role' => ['h-card' => ["p", "role"]], 'skill' => ['h-resume' => ["p", "skill"]], @@ -175,7 +175,7 @@ class Parser { 'h:ia' => self::DATE_TYPE_MIN, # HHam HHpm 'ha' => self::DATE_TYPE_HOUR, - // 12-hour clock without hour's leading zero; this is not part of the spec, but probably occurs + // 12-hour clock without hour's leading zero; these are not part of the spec, but definitely occur 'g:i:sa' => self::DATE_TYPE_SEC, 'g:ia' => self::DATE_TYPE_MIN, 'ga' => self::DATE_TYPE_HOUR, @@ -189,6 +189,7 @@ class Parser { // Hour-only time zones require special processing # Z '\Z' => self::DATE_TYPE_ZULU, + // Lowercase z is used in tests '\z' => self::DATE_TYPE_ZULU, ]; protected const DATE_OUTPUT_FORMATS = [ @@ -715,7 +716,7 @@ class Parser { // https://microformats.org/wiki/rel-tag#Abstract // we are required to retrieve the last component of the URL path and use that if (preg_match('#([^/]*)/?$#', URL::fromString($this->normalizeUrl($node->getAttribute("href")))->getPath(), $match)) { - return $match[1]; + return urldecode($match[1]); } return ""; } diff --git a/tests/cases/StandardTest.php b/tests/cases/StandardTest.php index b081da5..b1e2e60 100644 --- a/tests/cases/StandardTest.php +++ b/tests/cases/StandardTest.php @@ -115,6 +115,7 @@ class StandardTest extends \PHPUnit\Framework\TestCase { break; case "phpmf2/classic/fberriman": case "phpmf2/classic/mixedroots2": + case "phpmf2/classic/hentry-tag": $this->fixDates($exp['items'][0]['properties']['published']); break; diff --git a/tests/cases/third-party/phpmf2/classic/hentry-tag.html b/tests/cases/third-party/phpmf2/classic/hentry-tag.html new file mode 100644 index 0000000..36fbcb0 --- /dev/null +++ b/tests/cases/third-party/phpmf2/classic/hentry-tag.html @@ -0,0 +1,29 @@ + +
+
+ +

IndieWeb generation 4 and hosted domains

+
+ +
+

Naturally because of the goals of Micro.blog, I see a lot of discussion about “owning your content”. It’s an important part of the mission for Micro.blog to take control back from closed, ad-supported social networks and instead embrace posting on our own blogs again.

+

But what does it mean to own our content? Do we have to install WordPress or some home-grown blogging system for it to be considered true content ownership, where we have the source code and direct SFTP access to the server? No. If that’s our definition, then content ownership will be permanently reserved for programmers and technical folks who have hours to spend on server configuration.

+

IndieWebCamp has a generations chart to illustrate the path from early adopters to mainstream users. Eli Mellen highlighted it in a recent post about the need to bridge the gap between the technical aspects of IndieWeb tools and more approachable platforms. With Micro.blog specifically, the goal is “generation 4”, and I think we’re on track to get there.

+

I want blogging to be as easy as tweeting. Anything short of that isn’t good enough for Micro.blog. You’ll notice when you use Twitter that they never ask you to SFTP into twitter.com to configure your account. They don’t ask you to install anything.

+

More powerful software that you can endlessly customize will always have its place. It’s good to have a range of options, including open source to tinker with. That’s often where some of the best ideas start. But too often I see people get lost in the weeds of plugins and themes, lured in by the myth that you have to self-host with WordPress to be part of the IndieWeb.

+

Owning your content isn’t about portable software. It’s about portable URLs and data. It’s about domain names.

+

When you write and post photos at your own domain name, your content can outlive any one blogging platform. This month marked the 16th anniversary of blogging at manton.org, and in that time I’ve switched blogging platforms and hosting providers a few times. The posts and URLs can all be preserved through those changes because it’s my own domain name.

+

I was disappointed when Medium announced they were discontinuing support for custom domain names. I’m linking to the Internet Archive copy because Medium’s help page about this is no longer available. If “no custom domains” is still their policy, it’s a setback for the open web, and dooms Medium to the same dead-end as twitter.com/username URLs.

+

If you can’t use your own domain name, you can’t own it. Your content will be forever stuck at those silo URLs, beholden to the whims of the algorithmic timeline and shifting priorities of the executive team.

+

For hosted blogs on Micro.blog, we encourage everyone to map a custom domain to their content, and we throw in free SSL and preserve redirects for old posts on imported WordPress content. There’s more we can do.

+

I’m working on the next version of the macOS app for Micro.blog now, which features multiple accounts and even multiple blogs under the same account. Here’s a screenshot of the settings screen:

+

Mac screenshot

+

The goal with Micro.blog is not to be a stop-gap hosting provider, with truly “serious” users eventually moving on to something else (although we make that easy). We want Micro.blog hosting to be the best platform for owning your content and participating in the Micro.blog and IndieWeb communities.

+
+ + +
\ No newline at end of file diff --git a/tests/cases/third-party/phpmf2/classic/hentry-tag.json b/tests/cases/third-party/phpmf2/classic/hentry-tag.json new file mode 100644 index 0000000..c3f31c8 --- /dev/null +++ b/tests/cases/third-party/phpmf2/classic/hentry-tag.json @@ -0,0 +1,115 @@ +{ + "items": [ + { + "type": [ + "h-entry" + ], + "properties": { + "name": [ + "IndieWeb generation 4 and hosted domains" + ], + "category": [ + "technology", + "domains", + "indiewebcamp", + "microblog", + "wordpress" + ], + "url": [ + "http://www.manton.org/2018/03/indieweb-generation-4-and-hosted-domains.html" + ], + "content": [ + { + "html": "

Naturally because of the goals of Micro.blog, I see a lot of discussion about \u201cowning your content\u201d. It\u2019s an important part of the mission for Micro.blog to take control back from closed, ad-supported social networks and instead embrace posting on our own blogs again.

\n

But what does it mean to own our content? Do we have to install WordPress or some home-grown blogging system for it to be considered true content ownership, where we have the source code and direct SFTP access to the server? No. If that\u2019s our definition, then content ownership will be permanently reserved for programmers and technical folks who have hours to spend on server configuration.

\n

IndieWebCamp has a generations chart to illustrate the path from early adopters to mainstream users. Eli Mellen highlighted it in a recent post about the need to bridge the gap between the technical aspects of IndieWeb tools and more approachable platforms. With Micro.blog specifically, the goal is \u201cgeneration 4\u201d, and I think we\u2019re on track to get there.

\n

I want blogging to be as easy as tweeting. Anything short of that isn\u2019t good enough for Micro.blog. You\u2019ll notice when you use Twitter that they never ask you to SFTP into twitter.com to configure your account. They don\u2019t ask you to install anything.

\n

More powerful software that you can endlessly customize will always have its place. It\u2019s good to have a range of options, including open source to tinker with. That\u2019s often where some of the best ideas start. But too often I see people get lost in the weeds of plugins and themes, lured in by the myth that you have to self-host with WordPress to be part of the IndieWeb.

\n

Owning your content isn\u2019t about portable software. It\u2019s about portable URLs and data. It\u2019s about domain names.

\n

When you write and post photos at your own domain name, your content can outlive any one blogging platform. This month marked the 16th anniversary of blogging at manton.org, and in that time I\u2019ve switched blogging platforms and hosting providers a few times. The posts and URLs can all be preserved through those changes because it\u2019s my own domain name.

\n

I was disappointed when Medium announced they were discontinuing support for custom domain names. I\u2019m linking to the Internet Archive copy because Medium\u2019s help page about this is no longer available. If \u201cno custom domains\u201d is still their policy, it\u2019s a setback for the open web, and dooms Medium to the same dead-end as twitter.com/username URLs.

\n

If you can\u2019t use your own domain name, you can\u2019t own it. Your content will be forever stuck at those silo URLs, beholden to the whims of the algorithmic timeline and shifting priorities of the executive team.

\n

For hosted blogs on Micro.blog, we encourage everyone to map a custom domain to their content, and we throw in free SSL and preserve redirects for old posts on imported WordPress content. There\u2019s more we can do.

\n

I\u2019m working on the next version of the macOS app for Micro.blog now, which features multiple accounts and even multiple blogs under the same account. Here\u2019s a screenshot of the settings screen:

\n

\"Mac

\n

The goal with Micro.blog is not to be a stop-gap hosting provider, with truly \u201cserious\u201d users eventually moving on to something else (although we make that easy). We want Micro.blog hosting to be the best platform for owning your content and participating in the Micro.blog and IndieWeb communities.

", + "value": "Naturally because of the goals of Micro.blog, I see a lot of discussion about \u201cowning your content\u201d. It\u2019s an important part of the mission for Micro.blog to take control back from closed, ad-supported social networks and instead embrace posting on our own blogs again.\nBut what does it mean to own our content? Do we have to install WordPress or some home-grown blogging system for it to be considered true content ownership, where we have the source code and direct SFTP access to the server? No. If that\u2019s our definition, then content ownership will be permanently reserved for programmers and technical folks who have hours to spend on server configuration.\nIndieWebCamp has a generations chart to illustrate the path from early adopters to mainstream users. Eli Mellen highlighted it in a recent post about the need to bridge the gap between the technical aspects of IndieWeb tools and more approachable platforms. With Micro.blog specifically, the goal is \u201cgeneration 4\u201d, and I think we\u2019re on track to get there.\nI want blogging to be as easy as tweeting. Anything short of that isn\u2019t good enough for Micro.blog. You\u2019ll notice when you use Twitter that they never ask you to SFTP into twitter.com to configure your account. They don\u2019t ask you to install anything.\nMore powerful software that you can endlessly customize will always have its place. It\u2019s good to have a range of options, including open source to tinker with. That\u2019s often where some of the best ideas start. But too often I see people get lost in the weeds of plugins and themes, lured in by the myth that you have to self-host with WordPress to be part of the IndieWeb.\nOwning your content isn\u2019t about portable software. It\u2019s about portable URLs and data. It\u2019s about domain names.\nWhen you write and post photos at your own domain name, your content can outlive any one blogging platform. This month marked the 16th anniversary of blogging at manton.org, and in that time I\u2019ve switched blogging platforms and hosting providers a few times. The posts and URLs can all be preserved through those changes because it\u2019s my own domain name.\nI was disappointed when Medium announced they were discontinuing support for custom domain names. I\u2019m linking to the Internet Archive copy because Medium\u2019s help page about this is no longer available. If \u201cno custom domains\u201d is still their policy, it\u2019s a setback for the open web, and dooms Medium to the same dead-end as twitter.com/username URLs.\nIf you can\u2019t use your own domain name, you can\u2019t own it. Your content will be forever stuck at those silo URLs, beholden to the whims of the algorithmic timeline and shifting priorities of the executive team.\nFor hosted blogs on Micro.blog, we encourage everyone to map a custom domain to their content, and we throw in free SSL and preserve redirects for old posts on imported WordPress content. There\u2019s more we can do.\nI\u2019m working on the next version of the macOS app for Micro.blog now, which features multiple accounts and even multiple blogs under the same account. Here\u2019s a screenshot of the settings screen:\nMac screenshot\nThe goal with Micro.blog is not to be a stop-gap hosting provider, with truly \u201cserious\u201d users eventually moving on to something else (although we make that easy). We want Micro.blog hosting to be the best platform for owning your content and participating in the Micro.blog and IndieWeb communities." + } + ], + "published": [ + "2018-03-23T09:24:36+00:00" + ], + "author": [ + { + "type": [ + "h-card" + ], + "properties": { + "name": [ + "manton" + ], + "url": [ + "http://www.manton.org/author/manton" + ] + }, + "value": "manton" + } + ] + }, + "id": "post-6586" + } + ], + "rels": { + "category": [ + "http://www.manton.org/category/technology" + ], + "tag": [ + "http://www.manton.org/category/technology", + "http://www.manton.org/tag/domains", + "http://www.manton.org/tag/indiewebcamp", + "http://www.manton.org/tag/microblog", + "http://www.manton.org/tag/wordpress" + ], + "bookmark": [ + "http://www.manton.org/2018/03/indieweb-generation-4-and-hosted-domains.html" + ], + "author": [ + "http://www.manton.org/author/manton" + ] + }, + "rel-urls": { + "http://www.manton.org/category/technology": { + "text": "Technology", + "rels": [ + "category", + "tag" + ] + }, + "http://www.manton.org/tag/domains": { + "text": "domains", + "rels": [ + "tag" + ] + }, + "http://www.manton.org/tag/indiewebcamp": { + "text": "indiewebcamp", + "rels": [ + "tag" + ] + }, + "http://www.manton.org/tag/microblog": { + "text": "microblog", + "rels": [ + "tag" + ] + }, + "http://www.manton.org/tag/wordpress": { + "text": "wordpress", + "rels": [ + "tag" + ] + }, + "http://www.manton.org/2018/03/indieweb-generation-4-and-hosted-domains.html": { + "title": "9:24 am", + "text": "2018/03/23", + "rels": [ + "bookmark" + ] + }, + "http://www.manton.org/author/manton": { + "title": "View all posts by manton", + "text": "manton", + "rels": [ + "author" + ] + } + } +} \ No newline at end of file diff --git a/tests/cases/third-party/phpmf2/classic/hentry-tag2.html b/tests/cases/third-party/phpmf2/classic/hentry-tag2.html new file mode 100644 index 0000000..5952145 --- /dev/null +++ b/tests/cases/third-party/phpmf2/classic/hentry-tag2.html @@ -0,0 +1,5 @@ +
+
+ Entry content should not include the generated data element for rel tag backcompat +
+
\ No newline at end of file diff --git a/tests/cases/third-party/phpmf2/classic/hentry-tag2.json b/tests/cases/third-party/phpmf2/classic/hentry-tag2.json new file mode 100644 index 0000000..9e52242 --- /dev/null +++ b/tests/cases/third-party/phpmf2/classic/hentry-tag2.json @@ -0,0 +1,33 @@ +{ + "items": [ + { + "type": [ + "h-entry" + ], + "properties": { + "category": [ + "test" + ], + "content": [ + { + "html": "Entry content should not include the generated data element for rel tag backcompat test", + "value": "Entry content should not include the generated data element for rel tag backcompat test" + } + ] + } + } + ], + "rels": { + "tag": [ + "http://example.com/tag/test" + ] + }, + "rel-urls": { + "http://example.com/tag/test": { + "text": "test", + "rels": [ + "tag" + ] + } + } +} \ No newline at end of file diff --git a/tests/cases/third-party/phpmf2/classic/hreview-tag.html b/tests/cases/third-party/phpmf2/classic/hreview-tag.html new file mode 100644 index 0000000..5f84718 --- /dev/null +++ b/tests/cases/third-party/phpmf2/classic/hreview-tag.html @@ -0,0 +1,32 @@ + +
+ +

+ Divine Brunch! +

+ + Feb 19, 2006 + + by Joan Gelfand + +
+ + Garçon + +
+
1101 Valencia Street
+ San Francisco, + CA +
+
+

+ ★★★ + Best Benedicts! + Two perfectly poached eggs and a thin slice of tasty, French ham rest on a circle of warm brioche. Drizzled on top is a light, slightly tangy sauce. Seamless! The sophisticated room and great wine list added to the whole experience - Super!

+

Technorati Tags: ,

+
\ No newline at end of file diff --git a/tests/cases/third-party/phpmf2/classic/hreview-tag.json b/tests/cases/third-party/phpmf2/classic/hreview-tag.json new file mode 100644 index 0000000..e29ba69 --- /dev/null +++ b/tests/cases/third-party/phpmf2/classic/hreview-tag.json @@ -0,0 +1,103 @@ +{ + "items": [ + { + "type": [ + "h-review" + ], + "properties": { + "name": [ + "Divine Brunch!", + { + "value": "Joan Gelfand", + "type": [ + "h-card" + ], + "properties": { + } + } + ], + "reviewed": [ + "20060219T1919-0800" + ], + "author": [ + { + "value": "Joan Gelfand", + "type": [ + "h-card" + ], + "properties": { + } + } + ], + "item": [ + { + "value": "Garçon", + "type": [ + "h-card" + ], + "properties": { + "url": [ + "http://www.garconsf.com" + ], + "name": [ + "Garçon" + ], + "adr": [ + { + "value": "1101 Valencia Street San Francisco, CA", + "type": [ + "h-adr" + ], + "properties": { + "street-address": [ + "1101 Valencia Street" + ], + "locality": [ + "San Francisco" + ], + "region": [ + "CA" + ] + } + } + ] + } + } + ], + "content": [ + { + "value": "★★★ Best Benedicts! Two perfectly poached eggs and a thin slice of tasty, French ham rest on a circle of warm brioche. Drizzled on top is a light, slightly tangy sauce. Seamless! The sophisticated room and great wine list added to the whole experience - Super!", + "html": "

\n ★★★ \n Best Benedicts!\n Two perfectly poached eggs and a thin slice of tasty, French ham rest on a circle of warm brioche. Drizzled on top is a light, slightly tangy sauce. Seamless! The sophisticated room and great wine list added to the whole experience - Super!

" + } + ], + "rating": [ + "3" + ], + "category": [ + "Garcon", + "Garçon" + ] + } + } + ], + "rels": { + "tag": [ + "http://www.technorati.com/tag/Garcon", + "http://www.technorati.com/tag/Garçon" + ] + }, + "rel-urls": { + "http://www.technorati.com/tag/Garcon": { + "text": "Garcon", + "rels": [ + "tag" + ] + }, + "http://www.technorati.com/tag/Garçon": { + "text": "Garçon", + "rels": [ + "tag" + ] + } + } +} \ No newline at end of file