Browse Source

Feed links

master
J. King 4 years ago
parent
commit
0ebbd6b7cf
  1. 12
      lib/Parser/XML/Construct.php
  2. 10
      lib/Parser/XML/Feed.php
  3. 98
      tests/cases/XML/feed-atom.yaml
  4. 14
      tests/cases/XML/feed-rss0.yaml
  5. 16
      tests/cases/XML/feed-rss1.yaml
  6. 68
      tests/cases/XML/feed-rss2.yaml

12
lib/Parser/XML/Construct.php

@ -213,4 +213,16 @@ abstract class Construct {
protected function getLangRss2(): ?string {
return $this->fetchString("language", ".+");
}
protected function getLinkAtom(): ?Url {
return $this->fetchAtomRelation("alternate", ["text/html", "application/xhtml+xml"]);
}
protected function getLinkRss2(): ?Url {
return $this->fetchUrl("link") ?? $this->fetchUrl("guid[not(@isPermalink) or @isPermalink='true']");
}
protected function getLinkRss1(): ?Url {
return $this->fetchUrl("rss1:link|rss0:link");
}
}

10
lib/Parser/XML/Feed.php

@ -85,7 +85,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
$feed->id = $this->getId();
$feed->lang = $this->getLang();
$feed->url = $this->getUrl();
//$feed->link = $this->getLink();
$feed->link = $this->getLink();
//$feed->title = $this->getTitle();
//$feed->summary = $this->getSummary();
//$feed->dateModified = $this->getDateModified();
@ -123,14 +123,14 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
return $this->getUrlAtom() ?? $this->getUrlRss1() ?? $this->getUrlPod();
}
public function getTitle(): ?Text {
return $this->getTitleAtom() ?? $this->getTitleRss1() ?? $this->getTitleRss2() ?? $this->getTitleDC() ?? $this->getTitlePod();
}
public function getLink(): ?Url {
return $this->getLinkAtom() ?? $this->getLinkRss1() ?? $this->getLinkRss2();
}
public function getTitle(): ?Text {
return $this->getTitleAtom() ?? $this->getTitleRss1() ?? $this->getTitleRss2() ?? $this->getTitleDC() ?? $this->getTitlePod();
}
public function getSummary(): ?Text {
// unlike most other data, Atom is not preferred, because Atom doesn't really have feed summaries
return $this->getSummaryDC() ?? $this->getSummaryRss1() ?? $this->getSummaryRss2() ?? $this->getSummaryPod() ?? $this->getSummaryAtom();

98
tests/cases/XML/feed-atom.yaml

@ -65,3 +65,101 @@ Canonical URL:
format: atom
version: '1.0'
url: 'http://example.com/'
Feed link 1:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="alternate" href="http://example.com/"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 2: # default relation is "alternate"
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="" href="http://example.com/"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 3: # default relation is "alternate"
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://example.com/"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 4: # other relations are ignored
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="bogus" href="http://example.net/"/>
<link href="http://example.com/"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 5: # XHTML is preferred
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://example.net/"/>
<link href="http://example.com/" type="application/xhtml+xml; charset=utf-8"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 6: # HTML is even more preferred
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://example.net/"/>
<link href="http://example.org/" type="application/xhtml+xml; charset=utf-8"/>
<link href="http://example.com/" type="TEXT/HTML; charset=utf-8"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 7: # No type is better than an unacceptable type
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://example.net/" type="image/png"/>
<link href="http://example.org/" type="application/xml"/>
<link href="http://example.com/"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 8: # Bad URLs are ignored
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://example.com/" type="application/xhtml+xml; charset=utf-8"/>
<link href="http://[example.org]/" type="text/html; charset=utf-8"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'
Feed link 9: # The first matching relation wins
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://example.com/" type="text/html"/>
<link href="http://example.org/" type="text/html"/>
</feed>
output:
format: atom
version: '1.0'
link: 'http://example.com/'

14
tests/cases/XML/feed-rss0.yaml

@ -54,3 +54,17 @@ Canonical URL: # this is not actually a feature of RSS 0.90, but is consistent w
format: rdf
version: '0.90'
url: 'http://example.com/'
Feed link:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://channel.netscape.com/rdf/simple/0.9/">
<channel>
<link/>
<link>http://[example.net]/</link>
<link>http://example.com/</link>
</channel>
</rdf:RDF>
output:
format: rdf
version: '0.90'
link: 'http://example.com/'

16
tests/cases/XML/feed-rss1.yaml

@ -182,10 +182,24 @@ DC language:
Canonical URL:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel rdf:about="http://example.com/"/>
</rdf:RDF>
output:
format: rdf
version: '1.0'
url: 'http://example.com/'
Feed link:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<link/>
<link>http://[example.net]/</link>
<link>http://example.com/</link>
</channel>
</rdf:RDF>
output:
format: rdf
version: '1.0'
link: 'http://example.com/'

68
tests/cases/XML/feed-rss2.yaml

@ -9,7 +9,7 @@ Channel GUID:
input: >
<rss>
<channel>
<guid>http://example.com/</guid>
<guid isPermalink="false">http://example.com/</guid>
</channel>
</rss>
output:
@ -20,7 +20,7 @@ Channel GUID with whitespace:
input: >
<rss>
<channel>
<guid>
<guid isPermalink="false">
http://example.com/
</guid>
</channel>
@ -33,7 +33,7 @@ Root GUID: # Any elements on the RSS2 root element should be ignored
input: >
<rss>
<channel/>
<guid>http://example.com/</guid>
<guid isPermalink="false">http://example.com/</guid>
</rss>
output:
format: rss
@ -43,7 +43,7 @@ Bogus GUID before good:
<rss>
<channel>
<guid/>
<guid>http://example.com/</guid>
<guid isPermalink="false">http://example.com/</guid>
</channel>
</rss>
output:
@ -230,3 +230,63 @@ Feed language:
output:
format: rss
lang: ja
Feed link:
input: >
<rss><channel>
<link/>
<link>http://[example.net]/</link>
<link>http://example.com/</link>
</channel></rss>
output:
format: rss
link: 'http://example.com/'
Feed link via GUID 1:
input: >
<rss><channel>
<guid>http://example.com/</guid>
</channel></rss>
output:
format: rss
id: 'http://example.com/'
link: 'http://example.com/'
Feed link via GUID 2:
input: >
<rss><channel>
<guid isPermalink='true'>http://example.com/</guid>
</channel></rss>
output:
format: rss
id: 'http://example.com/'
link: 'http://example.com/'
GUID not a link:
input: >
<rss><channel>
<guid isPermalink='maybe not'>http://example.com/</guid>
</channel></rss>
output:
format: rss
id: 'http://example.com/'
Explicit link preferred:
input: >
<rss><channel>
<guid>http://example.net/</guid>
<link>http://example.com/</link>
</channel></rss>
output:
format: rss
id: 'http://example.net/'
link: 'http://example.com/'
GUID not a url:
input: >
<rss><channel>
<guid>http://[example.com]/</guid>
</channel></rss>
output:
format: rss
id: 'http://[example.com]/'

Loading…
Cancel
Save