Browse Source

Feed summaries

master
J. King 4 years ago
parent
commit
c2c7452764
  1. 14
      lib/Parser/XML/Construct.php
  2. 25
      lib/Parser/XML/Feed.php
  3. 25
      tests/cases/XML/feed-atom.yaml
  4. 24
      tests/cases/XML/feed-other.yaml
  5. 13
      tests/cases/XML/feed-rss0.yaml
  6. 26
      tests/cases/XML/feed-rss1.yaml
  7. 10
      tests/cases/XML/feed-rss2.yaml

14
lib/Parser/XML/Construct.php

@ -15,6 +15,10 @@ use MensBeam\Lax\Url;
abstract class Construct {
use \MensBeam\Lax\Parser\Construct;
protected const TEXT_LOOSE = "loose";
protected const TEXT_PLAIN = "plain";
protected const TEXT_HTML = "html";
/** @var \DOMDocument */
protected $document;
/** @var \DOMXPath */
@ -110,8 +114,6 @@ abstract class Construct {
$data = $this->trimText($data);
} elseif ($format === "html" || $format === "loose") {
$out->htmlBase = strlen($node->baseURI) ? $node->baseURI : null;
} elseif ($format === "xhtml") { // @codeCoverageIgnore
$out->xhtmlBase = strlen($node->baseURI) ? $node->baseURI : null; // @codeCoverageIgnore
}
$out->$format = $data;
return $out;
@ -289,19 +291,19 @@ abstract class Construct {
}
protected function getTitleRss1(): ?Text {
return $this->fetchText("rss1:title|rss0:title", "loose");
return $this->fetchText("rss1:title|rss0:title", self::TEXT_LOOSE);
}
protected function getTitleRss2(): ?Text {
return $this->fetchText("title", "loose");
return $this->fetchText("title", self::TEXT_LOOSE);
}
protected function getTitleDC(): ?Text {
return $this->fetchText("dc:title", "plain");
return $this->fetchText("dc:title", self::TEXT_PLAIN);
}
protected function getTitlePod(): ?Text {
return $this->fetchText("apple:title", "plain");
return $this->fetchText("apple:title", self::TEXT_PLAIN);
}
}

25
lib/Parser/XML/Feed.php

@ -87,7 +87,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
$feed->url = $this->getUrl();
$feed->link = $this->getLink();
$feed->title = $this->getTitle();
//$feed->summary = $this->getSummary();
$feed->summary = $this->getSummary();
//$feed->dateModified = $this->getDateModified();
//$feed->icon = $this->getIcon();
//$feed->image = $this->getImage();
@ -132,8 +132,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
}
public function getSummary(): ?Text {
// unlike most other data, Atom is not preferred, because Atom doesn't really have feed summaries
return $this->getSummaryDC() ?? $this->getSummaryRss1() ?? $this->getSummaryRss2() ?? $this->getSummaryPod() ?? $this->getSummaryAtom();
return $this->getSummaryAtom() ?? $this->getSummaryDC() ?? $this->getSummaryRss1() ?? $this->getSummaryRss2() ?? $this->getSummaryPod();
}
public function getCategories(): CategoryCollection {
@ -266,4 +265,24 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
protected function getUrlPod(): ?Url {
return $this->fetchUrl("apple:new-feed-url");
}
protected function getSummaryAtom(): ?Text {
return $this->fetchAtomText("atom:summary") ?? $this->fetchAtomText("atom:subtitle");
}
protected function getSummaryRss2(): ?Text {
return $this->fetchText("description", self::TEXT_LOOSE);
}
protected function getSummaryRss1(): ?Text {
return $this->fetchText("rss1:description|rss0:description", self::TEXT_LOOSE);
}
protected function getSummaryDC(): ?Text {
return $this->fetchText("dc:description", self::TEXT_PLAIN);
}
protected function getSummaryPod(): ?Text {
return $this->fetchText("apple:summary|gplay:description", self::TEXT_PLAIN) ?? $this->fetchText("apple:subtitle", self::TEXT_PLAIN);
}
}

25
tests/cases/XML/feed-atom.yaml

@ -28,7 +28,6 @@ Atom ID with whitespace:
id: 'http://example.com/'
Bogus ID before good:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
@ -310,6 +309,30 @@ Feed title in multiple formats:
xhtml: '<div xmlns="http://www.w3.org/1999/xhtml"> There &amp; Then </div>'
xhtmlBase: 'http://example.com/'
Feed summary 1:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<subtitle type="html">Subtitle text</subtitle>
<summary type="html">Summary text</summary>
</feed>
output:
format: atom
version: '1.0'
summary:
html: 'Summary text'
Feed summary 2:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<subtitle type="html">Subtitle text</subtitle>
</feed>
output:
format: atom
version: '1.0'
summary:
html: 'Subtitle text'
Ignored text constructs:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">

24
tests/cases/XML/feed-other.yaml

@ -59,3 +59,27 @@ iPod title:
format: rss
title:
plain: 'Plain text'
iPod summary: # Apple's own documentation doesn't mention a namespaced summary element, but Google's does
input: >
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<itunes:summary> Plain text</itunes:summary>
</channel>
</rss>
output:
format: rss
summary:
plain: 'Plain text'
Google Play summary:
input: >
<rss xmlns:play="http://www.google.com/schemas/play-podcasts/1.0">
<channel>
<play:description> Plain text</play:description>
</channel>
</rss>
output:
format: rss
summary:
plain: 'Plain text'

13
tests/cases/XML/feed-rss0.yaml

@ -95,3 +95,16 @@ Feed title 2:
title:
loose: 'Loose text'
htmlBase: 'https://example.com/'
Feed summary:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://channel.netscape.com/rdf/simple/0.9/">
<channel>
<description>Loose text</description>
</channel>
</rdf:RDF>
output:
format: rdf
version: '0.90'
summary:
loose: 'Loose text'

26
tests/cases/XML/feed-rss1.yaml

@ -243,3 +243,29 @@ DC title:
version: '1.0'
title:
plain: 'Plain text'
Feed summary:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<description>Loose text</description>
</channel>
</rdf:RDF>
output:
format: rdf
version: '1.0'
summary:
loose: 'Loose text'
DC summary:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<dc:description>Plain text</dc:description>
</channel>
</rdf:RDF>
output:
format: rdf
version: '1.0'
summary:
plain: 'Plain text'

10
tests/cases/XML/feed-rss2.yaml

@ -320,3 +320,13 @@ Feed title 2:
title:
loose: 'Loose text'
htmlBase: 'https://example.com/'
Feed summary:
input: >
<rss><channel>
<description>Loose text</description>
</channel></rss>
output:
format: rss
summary:
loose: 'Loose text'

Loading…
Cancel
Save