Browse Source

People fetching for entries

master
J. King 4 years ago
parent
commit
6c7db5e762
  1. 8
      lib/Parser/XML/Construct.php
  2. 29
      lib/Parser/XML/Entry.php
  3. 102
      tests/cases/XML/entry-atom.yaml
  4. 54
      tests/cases/XML/entry-mixed.yaml
  5. 54
      tests/cases/XML/entry-other.yaml
  6. 26
      tests/cases/XML/entry-rss1.yaml
  7. 19
      tests/cases/XML/entry-rss2.yaml

8
lib/Parser/XML/Construct.php

@ -159,9 +159,9 @@ abstract class Construct {
* - Full Name <user@example.com>
* - Full Name <mailto:user@example.com>
*/
protected function fetchPeople(string $query, string $role): ?PersonCollection {
protected function fetchPeople(string $query, string $role, ?\DOMNode $context = null): ?PersonCollection {
$out = new PersonCollection;
foreach ($this->fetchString($query, ".+", true) ?? [] as $person) {
foreach ($this->fetchString($query, ".+", true, $context) ?? [] as $person) {
$p = new Person;
if (preg_match("/^([^@\s]+@\S+) \((.+?)\)$/", $person, $match)) { // tests "user@example.com (Full Name)" form
if ($this->validateMail($match[1])) {
@ -304,8 +304,8 @@ abstract class Construct {
}
/** Finds and parses Atom person-constructs, and returns a collection of Person objects */
protected function fetchAtomPeople(string $query, string $role): ?PersonCollection {
$nodes = $this->xpath->query($query, $this->subject);
protected function fetchAtomPeople(string $query, string $role, \DOMNode $context = null): ?PersonCollection {
$nodes = $this->xpath->query($query, $context ?? $this->subject);
$out = new PersonCollection;
foreach ($nodes as $node) {
$p = new Person;

29
lib/Parser/XML/Entry.php

@ -123,7 +123,21 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
}
public function getPeople(): PersonCollection {
return new PersonCollection;
// first try getting authors and contributors in the entry itself
$authors = $this->getAuthors($this->subject);
$contributors = $this->getContributors($this->subject) ?? new PersonCollection;
// if there are no authors but there is an Atom <source> element, get both authors and contributors from the source
if (!$authors) {
$src = $this->fetchElement("atom:source");
if ($src) {
$authors = $this->getAuthors($src) ?? new PersonCollection;
$srcContributors = $this->getContributors($src) ?? new PersonCollection;
} else {
$authors = new PersonCollection;
}
}
// merge all three lists
return $authors->merge($contributors, $srcContributors ?? new PersonCollection);
}
public function getCategories(): CategoryCollection {
@ -166,4 +180,17 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
}
return null;
}
protected function getAuthors(\DOMNode $context): ?PersonCollection {
return $this->fetchAtomPeople("atom:author", "author", $context) // Atom authors
?? $this->fetchPeople("dc:creator|dct:creator", "author", $context) // Dublin Core creators
?? $this->fetchPeople("rss2:author", "author", $context) // RSS 2.0 authors
?? $this->fetchPeople("gplay:author", "author", $context) // Google Play authors
?? $this->fetchPeople("apple:author", "author", $context); // iTunes authors
}
protected function getContributors(\DOMNode $context): ?PersonCollection {
return $this->fetchAtomPeople("atom:contributor", "contributor", $context) // Atom contributors
?? $this->fetchPeople("dc:contributor|dct:contributor", "contributor", $context); // Dublin Core contributors
}
}

102
tests/cases/XML/entry-atom.yaml

@ -147,3 +147,105 @@ Content and summary:
summary: 'Plain summary'
- content: 'Plain content'
- summary: 'Plain summary'
Authors and contributors:
input: >
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<author><name>John Doe</name></author>
</entry>
<entry>
<author><name>John Doe</name></author>
<author><name>Jane Doe</name></author>
</entry>
<entry>
<author><name>John Doe</name></author>
<contributor><name>Larry</name></contributor>
</entry>
<entry>
<author><name>John Doe</name></author>
<author><name>Jane Doe</name></author>
<contributor><name>Larry</name></contributor>
<contributor><name>Curly</name></contributor>
<contributor><name>Moe</name></contributor>
</entry>
<entry>
<author><name>John Doe</name></author>
<source>
<author><name>Jane Doe</name></author>
</source>
</entry>
<entry>
<author><name>John Doe</name></author>
<source>
<contributor><name>Larry</name></contributor>
</source>
</entry>
<entry>
<source>
<author><name>Jane Doe</name></author>
</source>
</entry>
<entry>
<source>
<author><name>Jane Doe</name></author>
<contributor><name>Curly</name></contributor>
</source>
</entry>
<entry>
<source>
<author><name>Jane Doe</name></author>
<contributor><name>Curly</name></contributor>
</source>
<contributor><name>Larry</name></contributor>
</entry>
</feed>
output:
format: atom
version: '1.0'
entries:
- people:
- name: 'John Doe'
role: author
- people:
- name: 'John Doe'
role: author
- name: 'Jane Doe'
role: author
- people:
- name: 'John Doe'
role: author
- name: Larry
role: contributor
- people:
- name: 'John Doe'
role: author
- name: 'Jane Doe'
role: author
- name: Larry
role: contributor
- name: Curly
role: contributor
- name: Moe
role: contributor
- people:
- name: 'John Doe'
role: author
- people:
- name: 'John Doe'
role: author
- people:
- name: 'Jane Doe'
role: author
- people:
- name: 'Jane Doe'
role: author
- name: Curly
role: contributor
- people:
- name: 'Jane Doe'
role: author
- name: Larry
role: contributor
- name: Curly
role: contributor

54
tests/cases/XML/entry-mixed.yaml

@ -39,3 +39,57 @@ Update and creation dates:
- dateCreated: '2020-03-03T00:00:00Z'
dateModified: '2020-01-01T00:00:00Z'
- dateModified: '2020-03-03T01:00:00+01:00'
Entry author:
input: >
<rss><channel xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
<item>
<dc:creator>jane.doe@example.com (Jane Doe)</dc:creator>
<author>john.doe@example.com (John Doe)</author>
</item>
<item>
<dc:contributor>Larry</dc:contributor>
<author>john.doe@example.com (John Doe)</author>
</item>
<item>
<dc:contributor>Larry</dc:contributor>
<dc:contributor>Curly</dc:contributor>
<atom:source>
<author>jane.doe@example.com (Jane Doe)</author>
<dc:contributor>Moe</dc:contributor>
</atom:source>
</item>
<item>
<author>Jane Doe</author>
<atom:source>
<atom:author><atom:name>John Doe</atom:name></atom:author>
<dc:contributor>Moe</dc:contributor>
</atom:source>
</item>
</channel></rss>
output:
format: rss
entries:
- people:
- name: 'Jane Doe'
mail: 'jane.doe@example.com'
role: author
- people:
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author
- name: Larry
role: contributor
- people:
- name: 'Jane Doe'
mail: 'jane.doe@example.com'
role: author
- name: Larry
role: contributor
- name: Curly
role: contributor
- name: Moe
role: contributor
- people:
- name: 'Jane Doe'
role: author

54
tests/cases/XML/entry-other.yaml

@ -48,3 +48,57 @@ Google Play summary:
format: rss
entries:
- summary: 'Plain summary'
iTunes authors:
input: >
<rss><channel xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<item>
<itunes:author>jane.doe@example.com (Jane Doe)</itunes:author>
<itunes:author>john.doe@example.com (John Doe)</itunes:author>
</item>
</channel></rss>
output:
format: rss
entries:
- people:
- name: 'Jane Doe'
mail: 'jane.doe@example.com'
role: author
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author
Google Play authors:
input: >
<rss><channel xmlns:play="http://www.google.com/schemas/play-podcasts/1.0">
<item>
<play:author>jane.doe@example.com (Jane Doe)</play:author>
<play:author>john.doe@example.com (John Doe)</play:author>
</item>
</channel></rss>
output:
format: rss
entries:
- people:
- name: 'Jane Doe'
mail: 'jane.doe@example.com'
role: author
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author
Mixed podcast authors: # Google Play is arbitrarily preferred
input: >
<rss><channel xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:play="http://www.google.com/schemas/play-podcasts/1.0">
<item>
<itunes:author>jane.doe@example.com (Jane Doe)</itunes:author>
<play:author>john.doe@example.com (John Doe)</play:author>
</item>
</channel></rss>
output:
format: rss
entries:
- people:
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author

26
tests/cases/XML/entry-rss1.yaml

@ -234,3 +234,29 @@ Dublin Core summary:
- summary: 'Plain summary'
- summary: 'Plain summary'
- summary: 'Abstract'
Dublin Core creators and contributors:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item>
<dc:creator>john.doe@example.com (John Doe)</dc:creator>
<term:creator>jane.doe@example.com (Jane Doe)</term:creator>
<dc:contributor>Larry</dc:contributor>
<term:contributor>Curly</term:contributor>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- people:
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author
- name: 'Jane Doe'
mail: 'jane.doe@example.com'
role: author
- name: Larry
role: contributor
- name: Curly
role: contributor

19
tests/cases/XML/entry-rss2.yaml

@ -140,3 +140,22 @@ Entry content:
format: rss
entries:
- content: {loose: 'Loose content'}
Entry author:
input: >
<rss><channel>
<item>
<author>jane.doe@example.com (Jane Doe)</author>
<author>john.doe@example.com (John Doe)</author>
</item>
</channel></rss>
output:
format: rss
entries:
- people:
- name: 'Jane Doe'
mail: 'jane.doe@example.com'
role: author
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author

Loading…
Cancel
Save