Browse Source

Initial category implementation

master
J. King 4 years ago
parent
commit
7902a27366
  1. 57
      lib/Parser/XML/Construct.php
  2. 17
      lib/Parser/XML/Feed.php
  3. 74
      lib/Parser/XML/OldFeed.php
  4. 10
      tests/cases/AbstractParserTestCase.php
  5. 19
      tests/cases/XML/feed-atom.yaml

57
lib/Parser/XML/Construct.php

@ -6,6 +6,8 @@
declare(strict_types=1);
namespace MensBeam\Lax\Parser\XML;
use MensBeam\Lax\Category\Category;
use MensBeam\Lax\Category\Collection as CategoryCollection;
use MensBeam\Lax\Person\Person;
use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Text;
@ -77,7 +79,7 @@ abstract class Construct {
* @param \DOMNode $context The context node for the XPath query
* @return \MensBeam\Lax\Date|array|null
*/
protected function fetchDate(string $query, int $mode = self::DATE_ANY, \DOMNode $context = null) {
protected function fetchDate(string $query, int $mode, \DOMNode $context = null) {
$out = [];
$tz = new \DateTimeZone("UTC");
assert(in_array($mode, [self::DATE_ANY, self::DATE_ALL, self::DATE_EARLIEST, self::DATE_LATEST]));
@ -327,5 +329,56 @@ abstract class Construct {
protected function getTitlePod(): ?Text {
return $this->fetchText("apple:title", self::TEXT_PLAIN);
}
protected function getCategoriesAtom(): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->xpath->query("atom:category[@term]") as $node) {
$c = new Category;
$c->domain = $this->trimText($node->getAttribute("scheme"));
$c->label = $this->trimText($node->getAttribute("label"));
$c->name = $this->trimText($node->getAttribute("term"));
if (strlen($c->name)) {
$out[] = $c;
}
}
return count($out) ? $out : null;
}
protected function getCategoriesRss2(): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->xpath->query("category") as $node) {
$c = new Category;
$c->domain = $this->trimText($node->getAttribute("domain"));
$c->name = $this->trimText($node->textContent);
if (strlen($c->name)) {
$out[] = $c;
}
}
return count($out) ? $out : null;
}
/** Dublin Core doesn't have an obvious category type, so we use 'subject' as a nearest approximation */
protected function getCategoriesDC(): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->fetchString("dc:subject", null, true) ?? [] as $text) {
if (strlen($text)) {
$c = new Category;
$c->name = $text;
$out[] = $c;
}
}
return count($out) ? $out : null;
}
protected function getCategoriesPod(): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->xpath->query("apple:category|gplay:category") ?? [] as $node) {
$c = new Category;
$c->name = $this->trimText($node->getAttribute("text"));
if (strlen($c->name)) {
$out[] = $c;
}
}
return count($out) ? $out : null;
}
}

17
lib/Parser/XML/Feed.php

@ -92,7 +92,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
$feed->icon = $this->getIcon();
$feed->image = $this->getImage();
//$feed->people = $this->getPeople();
//$feed->categories = $this->getCategories();
$feed->categories = $this->getCategories();
//$feed->entries = $this->getEntries($feed);
return $feed;
}
@ -105,14 +105,14 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
public function getSchedule(): Schedule {
$sched = new Schedule;
$sched->interval = $this->getSchedIntervalRss1() ?? $this->getSchedIntervalRss2();
$sched->interval = $this->getSchedIntervalRss1() ?? $this->getSchedIntervalRss2();
$sched->skip = $this->getSchedSkipRss2();
$sched->expired = $this->getExpiredPod();
if (is_null($sched->expired) && (($sched->skip & Schedule::DAY_ALL) == Schedule::DAY_ALL || ($sched->skip & Schedule::HOUR_ALL) == Schedule::HOUR_ALL)) {
$sched->expired = true;
}
if ($sched->interval) {
$sched->base = $this->getSchedBaseRss1();
$sched->base = $this->fetchDate("sched:updateBase", self::DATE_ANY);
}
return $sched;
}
@ -178,7 +178,11 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
}
public function getCategories(): CategoryCollection {
return $this->getCategoriesAtom() ?? $this->getCategoriesRss2() ?? $this->getCategoriesDC() ?? $this->getCategoriesPod() ?? new CategoryCollection;
return $this->getCategoriesAtom()
?? $this->getCategoriesRss2()
?? $this->getCategoriesPod()
?? $this->getCategoriesDC()
?? new CategoryCollection;
}
public function getPeople(): PersonCollection {
@ -233,11 +237,6 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
return null;
}
protected function getSchedBaseRss1(): ?Date {
return $this->fetchDate("sched:updateBase");
}
/** Computes the "skip-schedule" of an RSS feed, the set of days and hours during which a feed should not be fetched */
protected function getSchedSkipRss2(): ?int {
$out = 0;

74
lib/Parser/XML/OldFeed.php

@ -1,74 +0,0 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Lax\Parser\XML\Primitives;
use MensBeam\Lax\Parser\XML\XPath;
use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Date;
use MensBeam\Lax\Schedule;
use MensBeam\Lax\Text;
use MensBeam\Lax\Url;
trait Feed {
/** Primitive to fetch an Atom feed summary
*
* Atom does not have a 'description' element like the RSSes, but it does have 'subtitle', which fills roughly the same function
*/
protected function getSummaryAtom(): ?Text {
return $this->fetchStringAtom("atom:subtitle");
}
/** Primitive to fetch an RSS feed summary */
protected function getSummaryRss2(): ?Text {
return $this->fetchString("description");
}
/** Primitive to fetch an RDF feed summary */
protected function getSummaryRss1(): ?Text {
return $this->fetchString("rss1:description|rss0:description");
}
/** Primitive to fetch a Dublin Core feed summary */
protected function getSummaryDC(): ?Text {
return $this->fetchString("dc:description");
}
/** Primitive to fetch a podcast summary */
protected function getSummaryPod(): ?Text {
return $this->fetchString("apple:summary|gplay:description") ?? $this->fetchString("apple:subtitle");
}
/** Primitive to fetch a collection of authors associated with an Atom feed */
protected function getAuthorsAtom(): ?PersonCollection {
return $this->fetchPeopleAtom("atom:author", "author");
}
/** Primitive to fetch an RDF feed's canonical URL */
protected function getUrlRss1(): ?Url {
// XPath doesn't seem to like the query we'd need for this, so it must be done the hard way.
$node = $this->subject;
if ($node->hasAttributeNS(XPath::NS['rdf'], "about")) {
if (
($node->localName === "channel" && ($node->namespaceURI === XPath::NS['rss1'] || $node->namespaceURI === XPath::NS['rss0'])) ||
($node === $node->ownerDocument->documentElement && $node->localName === "RDF" && $node->namespaceURI === XPath::NS['rdf'])
) {
return $this->resolveNodeUrl($node, "about", XPath::NS['rdf']);
}
}
return null;
}
/** Primitive to fetch a podcast's canonical URL */
protected function getUrlPod(): ?Url {
return $this->fetchUrl("apple:new-feed-url");
}
/** Primitive to fetch the modification date of an RSS feed */
protected function getDateModifiedRss2(): ?Date {
return $this->fetchDate("lastBuildDate") ?? $this->fetchDate("pubDate");
}
}

10
tests/cases/AbstractParserTestCase.php

@ -84,6 +84,16 @@ class AbstractParserTestCase extends \PHPUnit\Framework\TestCase {
$c[] = $this->makePerson($m);
}
$f->$k = $c;
} elseif ($k === "categories") {
$c = new CategoryCollection;
foreach ($v as $m) {
$o = new Category;
foreach ($m as $kk => $vv) {
$o->$kk = $vv;
}
$c[] = $o;
}
$f->$k = $c;
} elseif ($k === "entries") {
$c = [];
foreach ($v as $m) {

19
tests/cases/XML/feed-atom.yaml

@ -445,3 +445,22 @@ Logo URL:
format: atom
version: '1.0'
image: 'http://example.com/'
Categories:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<category term="ook"/>
<category term="eek" label="Eek!"/>
<category term="ack" label="Ack!" scheme="4:3"/>
<category term="" label="Bogus"/>
</feed>
output:
format: atom
version: '1.0'
categories:
- name: ook
- name: eek
label: 'Eek!'
- name: ack
label: 'Ack!'
domain: '4:3' # Not treated as a URI

Loading…
Cancel
Save