Browse Source

Entry categories

master
J. King 4 years ago
parent
commit
bcea7631ec
  1. 28
      lib/Parser/XML/Construct.php
  2. 11
      lib/Parser/XML/Entry.php
  3. 7
      lib/Parser/XML/Feed.php
  4. 35
      lib/Parser/XML/OldEntry.php
  5. 34
      tests/cases/XML/entry-atom.yaml
  6. 28
      tests/cases/XML/entry-mixed.yaml
  7. 44
      tests/cases/XML/entry-other.yaml
  8. 54
      tests/cases/XML/entry-rss0.yaml
  9. 16
      tests/cases/XML/entry-rss1.yaml
  10. 17
      tests/cases/XML/entry-rss2.yaml

28
lib/Parser/XML/Construct.php

@ -389,9 +389,17 @@ abstract class Construct {
return $this->fetchText("apple:title", self::TEXT_PLAIN);
}
protected function getCategoriesAtom(): ?CategoryCollection {
protected function getCategoriesFromNode(\DOMNode $context): ?CategoryCollection {
return $this->getCategoriesAtom($context)
?? $this->getCategoriesRss2($context)
?? $this->getCategoriesGPlay($context)
?? $this->getCategoriesTunes($context)
?? $this->getCategoriesDC($context);
}
protected function getCategoriesAtom(\DOMNode $context): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->xpath->query("atom:category[@term]") as $node) {
foreach ($this->xpath->query("atom:category[@term]", $context) as $node) {
$c = new Category;
$c->domain = $this->trimText($node->getAttribute("scheme"));
$c->label = $this->trimText($node->getAttribute("label"));
@ -403,9 +411,9 @@ abstract class Construct {
return count($out) ? $out : null;
}
protected function getCategoriesRss2(): ?CategoryCollection {
protected function getCategoriesRss2(\DOMNode $context): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->xpath->query("rss2:category", $this->subject) as $node) {
foreach ($this->xpath->query("rss2:category", $context) as $node) {
$c = new Category;
$c->domain = $this->trimText($node->getAttribute("domain"));
$c->name = $this->trimText($node->textContent);
@ -417,9 +425,9 @@ abstract class Construct {
}
/** Dublin Core doesn't have an obvious category type, so we use 'subject' as a nearest approximation */
protected function getCategoriesDC(): ?CategoryCollection {
protected function getCategoriesDC(\DOMNode $context): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->fetchString("dc:subject|dct:subject", null, true) ?? [] as $text) {
foreach ($this->fetchString("dc:subject|dct:subject", null, true, $context) ?? [] as $text) {
if (strlen($text)) {
$c = new Category;
$c->name = $text;
@ -429,9 +437,9 @@ abstract class Construct {
return count($out) ? $out : null;
}
protected function getCategoriesTunes(): ?CategoryCollection {
protected function getCategoriesTunes(\DOMNode $context): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->xpath->query("apple:category", $this->subject) as $node) {
foreach ($this->xpath->query("apple:category", $context) as $node) {
$c = new Category;
$c->name = $this->trimText($node->getAttribute("text"));
if (strlen($c->name)) {
@ -448,9 +456,9 @@ abstract class Construct {
return count($out) ? $out : null;
}
protected function getCategoriesGPlay(): ?CategoryCollection {
protected function getCategoriesGPlay(\DOMNode $context): ?CategoryCollection {
$out = new CategoryCollection;
foreach ($this->xpath->query("gplay:category", $this->subject) as $node) {
foreach ($this->xpath->query("gplay:category", $context) as $node) {
$c = new Category;
$c->name = $this->trimText($node->getAttribute("text"));
if (strlen($c->name)) {

11
lib/Parser/XML/Entry.php

@ -141,7 +141,16 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
}
public function getCategories(): CategoryCollection {
return new CategoryCollection;
// first try to get categories from the entry itself
$list = $this->getCategoriesFromNode($this->subject);
if (!$list) {
// if there are none, try to get some from the entry's Atom <source> element, if any
$src = $this->fetchElement("atom:source");
if ($src) {
$list = $this->getCategoriesFromNode($src);
}
}
return $list ?? new CategoryCollection;
}
public function getEnclosures(): EnclosureCollection {

7
lib/Parser/XML/Feed.php

@ -191,12 +191,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
}
public function getCategories(): CategoryCollection {
return $this->getCategoriesAtom()
?? $this->getCategoriesRss2()
?? $this->getCategoriesGPlay()
?? $this->getCategoriesTunes()
?? $this->getCategoriesDC()
?? new CategoryCollection;
return $this->getCategoriesFromNode($this->subject) ?? new CategoryCollection;
}
public function getPeople(): PersonCollection {

35
lib/Parser/XML/OldEntry.php

@ -1,35 +0,0 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Lax\Parser\XML\Primitives;
use MensBeam\Lax\Parser\XML\XPath;
trait Entry {
/** Primitive to fetch a collection of authors associated with an Atom entry
*
* This differs from feeds in that an entry's <source> element (which possibly contains metadata for the source feed) is checked for authors if the entry itself has none
*/
protected function getAuthorsAtom() {
return $this->fetchPeopleAtom("atom:author", "author") ?? $this->fetchPeopleAtom("atom:source[1]/atom:author", "author");
}
/** Primitive to fetch an RDF entry's canonical URL */
protected function getUrlRss1() {
// XPath doesn't seem to like the query we'd need for this, so it must be done the hard way.
$node = $this->subject;
if ($node->localName === "item" && ($node->namespaceURI === XPath::NS['rss1'] || $node->namespaceURI == XPath::NS['rss0']) && $node->hasAttributeNS(XPath::NS['rdf'], "about")) {
return $this->resolveNodeUrl($node, "about", XPath::NS['rdf']);
} else {
return null;
}
}
/** Primitive to fetch the modification date of an RSS feed */
protected function getDateModifiedRss2() {
return $this->fetchDate("pubDate");
}
}

34
tests/cases/XML/entry-atom.yaml

@ -249,3 +249,37 @@ Authors and contributors:
role: contributor
- name: Curly
role: contributor
Categories:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<category term="ook"/>
<category term="eek"/>
</entry>
<entry>
<source>
<category term="ook"/>
<category term="eek"/>
</source>
</entry>
<entry>
<category term="ack"/>
<source>
<category term="ook"/>
<category term="eek"/>
</source>
</entry>
</feed>
output:
format: atom
version: '1.0'
entries:
- categories:
- name: ook
- name: eek
- categories:
- name: ook
- name: eek
- categories:
- name: ack

28
tests/cases/XML/entry-mixed.yaml

@ -93,3 +93,31 @@ Entry author:
- people:
- name: 'Jane Doe'
role: author
Categories:
input: >
<rss><channel xmlns:atom="http://www.w3.org/2005/Atom">
<item>
<category>ook</category>
</item>
<item>
<atom:source>
<category>ook</category>
</atom:source>
</item>
<item>
<category>eek</category>
<atom:source>
<category>ook</category>
</atom:source>
</item>
</channel></rss>
output:
format: rss
entries:
- categories:
- name: ook
- categories:
- name: ook
- categories:
- name: eek

44
tests/cases/XML/entry-other.yaml

@ -102,3 +102,47 @@ Mixed podcast authors: # Google Play is arbitrarily preferred
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author
iTunes categories:
input: >
<rss><channel xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<item>
<itunes:category text="Arts"/>
<itunes:category text="Arts ">
<itunes:category text=" Fashion &amp; Beauty"/>
</itunes:category>
<itunes:category text="Arts ">
<itunes:category text=" "/>
<itunes:category text="Books"/>
<itunes:category text="Design"/>
</itunes:category>
<itunes:category text=" "/>
<itunes:category>Bogus</itunes:category>
</item>
</channel></rss>
output:
format: rss
entries:
- categories:
- name: Arts
- name: Arts
subcategory: 'Fashion & Beauty'
- name: Arts
subcategory: Books
Google Play categories:
input: >
<rss><channel xmlns:play="http://www.google.com/schemas/play-podcasts/1.0">
<item>
<play:category text="Arts"/>
<play:category text=" "/>
<play:category>Bogus</play:category>
<play:category text="Music"/>
</item>
</channel></rss>
output:
format: rss
entries:
- categories:
- name: Arts
- name: Music

54
tests/cases/XML/entry-rss0.yaml

@ -1,54 +0,0 @@
Empty entry:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://channel.netscape.com/rdf/simple/0.9/">
<channel>
<item/>
</channel>
<item/>
</rdf:RDF>
output:
format: rdf
version: '0.90'
Entry link:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://channel.netscape.com/rdf/simple/0.9/">
<channel>
<item>
<link>http://example.com/</link>
</item>
</channel>
<item>
<link>http://example.com/</link>
</item>
</rdf:RDF>
output:
format: rdf
version: '0.90'
entries:
- link: 'http://example.com/'
- link: 'http://example.com/'
Entry title:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://channel.netscape.com/rdf/simple/0.9/">
<item>
<title>Loose title</title>
</item>
</rdf:RDF>
output:
format: rdf
version: '0.90'
entries:
- title: {loose: 'Loose title'}
Entry content: # RSS 0.90 didn't have entry descriptions
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://channel.netscape.com/rdf/simple/0.9/">
<item>
<description>Bogus description</description>
</item>
</rdf:RDF>
output:
format: rdf
version: '0.90'

16
tests/cases/XML/entry-rss1.yaml

@ -260,3 +260,19 @@ Dublin Core creators and contributors:
role: contributor
- name: Curly
role: contributor
Categories by way of Dublin Core subjects:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item>
<dc:subject>Category, sort of</dc:subject>
<term:subject>blah</term:subject>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- categories:
- name: 'Category, sort of'
- name: 'blah'

17
tests/cases/XML/entry-rss2.yaml

@ -159,3 +159,20 @@ Entry author:
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author
Categories:
input: >
<rss><channel>
<item>
<category>Category the first </category>
<category domain="ook eek">Category the second </category>
<category/>
</item>
</channel></rss>
output:
format: rss
entries:
- categories:
- name: 'Category the first'
- name: 'Category the second'
domain: 'ook eek'

Loading…
Cancel
Save