diff --git a/lib/Parser/XML/Construct.php b/lib/Parser/XML/Construct.php index 25cde4a..9b37bad 100644 --- a/lib/Parser/XML/Construct.php +++ b/lib/Parser/XML/Construct.php @@ -6,6 +6,8 @@ declare(strict_types=1); namespace MensBeam\Lax\Parser\XML; +use MensBeam\Lax\Category\Category; +use MensBeam\Lax\Category\Collection as CategoryCollection; use MensBeam\Lax\Person\Person; use MensBeam\Lax\Person\Collection as PersonCollection; use MensBeam\Lax\Text; @@ -77,7 +79,7 @@ abstract class Construct { * @param \DOMNode $context The context node for the XPath query * @return \MensBeam\Lax\Date|array|null */ - protected function fetchDate(string $query, int $mode = self::DATE_ANY, \DOMNode $context = null) { + protected function fetchDate(string $query, int $mode, \DOMNode $context = null) { $out = []; $tz = new \DateTimeZone("UTC"); assert(in_array($mode, [self::DATE_ANY, self::DATE_ALL, self::DATE_EARLIEST, self::DATE_LATEST])); @@ -327,5 +329,56 @@ abstract class Construct { protected function getTitlePod(): ?Text { return $this->fetchText("apple:title", self::TEXT_PLAIN); } - + + protected function getCategoriesAtom(): ?CategoryCollection { + $out = new CategoryCollection; + foreach ($this->xpath->query("atom:category[@term]") as $node) { + $c = new Category; + $c->domain = $this->trimText($node->getAttribute("scheme")); + $c->label = $this->trimText($node->getAttribute("label")); + $c->name = $this->trimText($node->getAttribute("term")); + if (strlen($c->name)) { + $out[] = $c; + } + } + return count($out) ? $out : null; + } + + protected function getCategoriesRss2(): ?CategoryCollection { + $out = new CategoryCollection; + foreach ($this->xpath->query("category") as $node) { + $c = new Category; + $c->domain = $this->trimText($node->getAttribute("domain")); + $c->name = $this->trimText($node->textContent); + if (strlen($c->name)) { + $out[] = $c; + } + } + return count($out) ? $out : null; + } + + /** Dublin Core doesn't have an obvious category type, so we use 'subject' as a nearest approximation */ + protected function getCategoriesDC(): ?CategoryCollection { + $out = new CategoryCollection; + foreach ($this->fetchString("dc:subject", null, true) ?? [] as $text) { + if (strlen($text)) { + $c = new Category; + $c->name = $text; + $out[] = $c; + } + } + return count($out) ? $out : null; + } + + protected function getCategoriesPod(): ?CategoryCollection { + $out = new CategoryCollection; + foreach ($this->xpath->query("apple:category|gplay:category") ?? [] as $node) { + $c = new Category; + $c->name = $this->trimText($node->getAttribute("text")); + if (strlen($c->name)) { + $out[] = $c; + } + } + return count($out) ? $out : null; + } } diff --git a/lib/Parser/XML/Feed.php b/lib/Parser/XML/Feed.php index d0a1c55..7ef673a 100644 --- a/lib/Parser/XML/Feed.php +++ b/lib/Parser/XML/Feed.php @@ -92,7 +92,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { $feed->icon = $this->getIcon(); $feed->image = $this->getImage(); //$feed->people = $this->getPeople(); - //$feed->categories = $this->getCategories(); + $feed->categories = $this->getCategories(); //$feed->entries = $this->getEntries($feed); return $feed; } @@ -105,14 +105,14 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { public function getSchedule(): Schedule { $sched = new Schedule; - $sched->interval = $this->getSchedIntervalRss1() ?? $this->getSchedIntervalRss2(); + $sched->interval = $this->getSchedIntervalRss1() ?? $this->getSchedIntervalRss2(); $sched->skip = $this->getSchedSkipRss2(); $sched->expired = $this->getExpiredPod(); if (is_null($sched->expired) && (($sched->skip & Schedule::DAY_ALL) == Schedule::DAY_ALL || ($sched->skip & Schedule::HOUR_ALL) == Schedule::HOUR_ALL)) { $sched->expired = true; } if ($sched->interval) { - $sched->base = $this->getSchedBaseRss1(); + $sched->base = $this->fetchDate("sched:updateBase", self::DATE_ANY); } return $sched; } @@ -178,7 +178,11 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { } public function getCategories(): CategoryCollection { - return $this->getCategoriesAtom() ?? $this->getCategoriesRss2() ?? $this->getCategoriesDC() ?? $this->getCategoriesPod() ?? new CategoryCollection; + return $this->getCategoriesAtom() + ?? $this->getCategoriesRss2() + ?? $this->getCategoriesPod() + ?? $this->getCategoriesDC() + ?? new CategoryCollection; } public function getPeople(): PersonCollection { @@ -233,11 +237,6 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { return null; } - protected function getSchedBaseRss1(): ?Date { - return $this->fetchDate("sched:updateBase"); - } - - /** Computes the "skip-schedule" of an RSS feed, the set of days and hours during which a feed should not be fetched */ protected function getSchedSkipRss2(): ?int { $out = 0; diff --git a/lib/Parser/XML/OldFeed.php b/lib/Parser/XML/OldFeed.php deleted file mode 100644 index da8c387..0000000 --- a/lib/Parser/XML/OldFeed.php +++ /dev/null @@ -1,74 +0,0 @@ -fetchStringAtom("atom:subtitle"); - } - - /** Primitive to fetch an RSS feed summary */ - protected function getSummaryRss2(): ?Text { - return $this->fetchString("description"); - } - - /** Primitive to fetch an RDF feed summary */ - protected function getSummaryRss1(): ?Text { - return $this->fetchString("rss1:description|rss0:description"); - } - - /** Primitive to fetch a Dublin Core feed summary */ - protected function getSummaryDC(): ?Text { - return $this->fetchString("dc:description"); - } - - /** Primitive to fetch a podcast summary */ - protected function getSummaryPod(): ?Text { - return $this->fetchString("apple:summary|gplay:description") ?? $this->fetchString("apple:subtitle"); - } - - /** Primitive to fetch a collection of authors associated with an Atom feed */ - protected function getAuthorsAtom(): ?PersonCollection { - return $this->fetchPeopleAtom("atom:author", "author"); - } - - /** Primitive to fetch an RDF feed's canonical URL */ - protected function getUrlRss1(): ?Url { - // XPath doesn't seem to like the query we'd need for this, so it must be done the hard way. - $node = $this->subject; - if ($node->hasAttributeNS(XPath::NS['rdf'], "about")) { - if ( - ($node->localName === "channel" && ($node->namespaceURI === XPath::NS['rss1'] || $node->namespaceURI === XPath::NS['rss0'])) || - ($node === $node->ownerDocument->documentElement && $node->localName === "RDF" && $node->namespaceURI === XPath::NS['rdf']) - ) { - return $this->resolveNodeUrl($node, "about", XPath::NS['rdf']); - } - } - return null; - } - - /** Primitive to fetch a podcast's canonical URL */ - protected function getUrlPod(): ?Url { - return $this->fetchUrl("apple:new-feed-url"); - } - - /** Primitive to fetch the modification date of an RSS feed */ - protected function getDateModifiedRss2(): ?Date { - return $this->fetchDate("lastBuildDate") ?? $this->fetchDate("pubDate"); - } -} diff --git a/tests/cases/AbstractParserTestCase.php b/tests/cases/AbstractParserTestCase.php index 264182d..7d72ff1 100644 --- a/tests/cases/AbstractParserTestCase.php +++ b/tests/cases/AbstractParserTestCase.php @@ -84,6 +84,16 @@ class AbstractParserTestCase extends \PHPUnit\Framework\TestCase { $c[] = $this->makePerson($m); } $f->$k = $c; + } elseif ($k === "categories") { + $c = new CategoryCollection; + foreach ($v as $m) { + $o = new Category; + foreach ($m as $kk => $vv) { + $o->$kk = $vv; + } + $c[] = $o; + } + $f->$k = $c; } elseif ($k === "entries") { $c = []; foreach ($v as $m) { diff --git a/tests/cases/XML/feed-atom.yaml b/tests/cases/XML/feed-atom.yaml index 878414f..24ab609 100644 --- a/tests/cases/XML/feed-atom.yaml +++ b/tests/cases/XML/feed-atom.yaml @@ -445,3 +445,22 @@ Logo URL: format: atom version: '1.0' image: 'http://example.com/' + +Categories: + input: > + + + + + + + output: + format: atom + version: '1.0' + categories: + - name: ook + - name: eek + label: 'Eek!' + - name: ack + label: 'Ack!' + domain: '4:3' # Not treated as a URI