Browse Source

Add RSS/DC people collections and primary authors

master
J. King 6 years ago
parent
commit
25dffb8eb8
  1. 1
      composer.json
  2. 42
      lib/Collection.php
  3. 18
      lib/Person.php
  4. 60
      lib/PersonCollection.php
  5. 59
      lib/XMLCommon.php
  6. 22
      lib/XMLCommonPrimitives.php
  7. 36
      lib/XMLEntryPrimitives.php
  8. 9
      lib/XMLFeed.php
  9. 28
      lib/XMLFeedPrimitives.php

1
composer.json

@ -16,6 +16,7 @@
"php": "^7.0",
"ext-json": "*",
"ext-dom": "*",
"ext-intl": "*",
"sabre/uri": "^2.0"
},
"autoload": {

42
lib/Collection.php

@ -0,0 +1,42 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax;
abstract class Collection implements \IteratorAggregate, \ArrayAccess, \Countable {
protected $data = [];
abstract public function primary();
public function getIterator(): \Traversable {
return ($this->data instanceof \Traversable) ? $this->data : new \ArrayIterator((array) $this->data);
}
public function count(): int {
return count($this->data);
}
public function offsetExists($offset): bool {
return isset($this->data[$offset]);
}
public function offsetGet($offset) {
return $this->data[$offset];
}
public function offsetSet($offset, $value) {
if (is_null($offset)) {
$this->data[] = $value;
} else {
$this->data[$offset] = $value;
}
}
public function offsetUnset($offset) {
unset($this->data[$offset]);
}
}

18
lib/Person.php

@ -0,0 +1,18 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax;
class Person {
public $name = "";
public $mail = "";
public $url = "";
public $role = "";
public function __toString() {
return strlen($this->mail) ? $this->name."<".$this->mail.">" : $this->name;
}
}

60
lib/PersonCollection.php

@ -0,0 +1,60 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax;
class PersonCollection extends Collection {
protected static $ranks = [
'webmaster' => 10,
'editor' => 20,
'contributor' => 30,
'author' => 40,
];
/** Returns the primary person of the collection
*
* The primary is the first member of the highest-weight role
*
* Roles are ranked thus:
* author > contributor > editor > webmaster > (anything else)
*
*/
public function primary() {
$out = null;
foreach ($this as $p) {
if (!$out) {
$out = $p;
} elseif (!isset(static::ranks[$p->role])) {
continue;
} elseif (static::ranks[$p->role] > static::ranks[$out->role]) {
$out = $p;
}
}
return $out;
}
/** Returns a collection filtered to include only the specified roles */
public function filterRole(string ...$role): self {
$out = new static;
foreach ($this as $p) {
if (in_array($p->role, $role)) {
$out[] = $p;
}
}
return $out;
}
/** Returns a collection filtered to exclude the specified roles */
public function filterOutRole(string ...$role): self {
$out = new static;
foreach ($this as $p) {
if (!in_array($p->role, $role)) {
$out[] = $p;
}
}
return $out;
}
}

59
lib/XMLCommon.php

@ -64,7 +64,7 @@ abstract class XMLCommon {
}
/** Retrieves multiple element node based on an XPath query */
protected function fetchElements(string $query) {
protected function fetchElements(string $query): \DOMNodeList {
return $this->xpath->query($query, $this->subject);
}
@ -142,4 +142,61 @@ abstract class XMLCommon {
$url = strlen($attr) ? $node->getAttributeNS($ns, $attr) : $this->trimText($node->textContent);
return $this->resolveURL($url, $base);
}
/** Parses an RSS person-text and returns a Person object with a name, e-mail address, or both
*
* The following forms will yield both a name and address:
*
* - user@example.com (Full Name)
* - Full Name <user@example.com>
*/
protected function parsePersonText(string $person): Person {
$person = $this->trimText($person);
$out = new Person;
if (!strlen($person)) {
return $out;
} elseif (preg_match("/^([^@\s]+@\S+) \((.+?)\)$/", $person, $match)) { // tests "user@example.com (Full Name)" form
if ($this->validateMail($match[1])) {
$out->name = $match[2];
$out->mail = $match[1];
} else {
$out->name = $person;
}
} elseif (preg_match("/^((?:\S|\s(?!<))+) <([^>]+)>$/", $person, $match)) { // tests "Full Name <user@example.com>" form
if ($this->validateMail($match[2])) {
$out->name = $match[1];
$out->mail = $match[2];
} else {
$out->name = $person;
}
} elseif ($this->validateMail($person)) {
$out->name = $person;
$out->mail = $person;
} else {
$out->name = $person;
}
return $out;
}
/** Tests whether a string is a valid e-mail address
*
* Accepts IDN hosts and Unicode localparts
*/
protected function validateMail(string $addr): bool {
$out = preg_match("/^(.+?)@([^@]+)$/", $addr, $match);
if (!$out) {
return false;
}
$local = $match[1];
$domain = $match[2];
// PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformat first
$domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think)
if ($domain===false) {
return false;
}
$addr = "$local@$domain";
// PHP 7.1 and above have the constant defined FIXME: Review if removing support for PHP 7.0
$flags = defined("\FILTER_FLAG_EMAIL_UNICODE") ? \FILTER_FLAG_EMAIL_UNICODE : 0;
return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, $flags);
}
}

22
lib/XMLCommonPrimitives.php

@ -139,4 +139,26 @@ trait XMLCommonPrimitives {
protected function getIdDC() {
return $this->fetchText("./dc:identifier");
}
/** Primitive to fetch a collection of people associated with a feed/entry via Dublin Core */
protected function getPeopleDC() {
$nodes = $this->fetchElements("./dc:creator|./dc:contributor");
if (!$nodes->length) {
return null;
}
$out = new PersonCollection;
$roles = [
'creator' => "author",
'contributor' => "contributor",
];
foreach ($nodes as $node) {
$text = $this->trimText($node->textContent);
if (strlen($text)) {
$p = $this->parsePersonText($text);
$p->role = $roles[$node->localName];
$out[] = $p;
}
}
return $out;
}
}

36
lib/XMLEntryPrimitives.php

@ -0,0 +1,36 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax;
trait XMLEntryPrimitives {
/** Primitive to fetch a collection of people associated with an RSS entry
*
* For RSS 2.0 this includes both native metadata and Dublin Core
*/
protected function getPeopleRss2() {
$nodes = $this->fetchElements("./author|./dc:creator|./dc:contributor");
if (!$nodes->length) {
return null;
}
$out = new PersonCollection;
$roles = [
'author' => "author",
'creator' => "author",
'contributor' => "contributor",
];
foreach ($nodes as $node) {
$text = $this->trimText($node->textContent);
if (strlen($text)) {
$p = $this->parsePersonText($text);
$p->role = $roles[$node->localName];
$out[] = $p;
}
}
return $out;
}
}

9
lib/XMLFeed.php

@ -17,6 +17,8 @@ class XMLFeed extends XMLCommon {
public $title;
public $summary;
public $categories;
public $people;
public $author;
/** Returns a parsed feed */
public function __construct(string $data, string $contentType = null, string $url = null) {
@ -65,6 +67,8 @@ class XMLFeed extends XMLCommon {
$this->link = $this->getLink();
$this->title = $this->getTitle() ?? $this->link;
$this->summary = $this->getSummary();
$this->people = $this->getPeople();
$this->author = $this->people->primary();
}
/** General function to fetch the feed title */
@ -97,4 +101,9 @@ class XMLFeed extends XMLCommon {
public function getId() {
return $this->getIdAtom() ?? $this->getIdDC() ?? $this->getIdRss2();
}
/** General function to fetch a collection of people associated with a feed */
public function getPeople() {
return $this->getPeopleRss2() ?? new PersonCollection;
}
}

28
lib/XMLFeedPrimitives.php

@ -35,4 +35,32 @@ trait XMLFeedPrimitives {
protected function getSummaryPod() {
return $this->fetchText("./apple:summary|./gplay:description") ?? $this->fetchText("./apple:subtitle");
}
/** Primitive to fetch a collection of people associated with an RSS feed
*
* For RSS 2.0 this includes both native metadata and Dublin Core
*/
protected function getPeopleRss2() {
$nodes = $this->fetchElements("./managingEditor|./webMaster|./author|./dc:creator|./dc:contributor");
if (!$nodes->length) {
return null;
}
$out = new PersonCollection;
$roles = [
'managingEditor' => "editor",
'webMaster' => "webmaster",
'author' => "author",
'creator' => "author",
'contributor' => "contributor",
];
foreach ($nodes as $node) {
$text = $this->trimText($node->textContent);
if (strlen($text)) {
$p = $this->parsePersonText($text);
$p->role = $roles[$node->localName];
$out[] = $p;
}
}
return $out;
}
}

Loading…
Cancel
Save