Browse Source

Prototype people fetching

master
J. King 4 years ago
parent
commit
23bf65f206
  1. 5
      lib/Parser/Construct.php
  2. 76
      lib/Parser/XML/Construct.php
  3. 24
      lib/Parser/XML/Feed.php

5
lib/Parser/Construct.php

@ -34,14 +34,13 @@ trait Construct {
* Accepts IDN hosts and Unicode localparts
*/
protected function validateMail(string $addr): bool {
$out = preg_match("/^(.+?)@([^@]+)$/", $addr, $match);
if (!$out) {
if (!preg_match("/^(.+?)@([^@]+)$/", $addr, $match)) {
return false;
}
$local = $match[1];
$domain = $match[2];
// PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformation first
$domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think)
$domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46);
if ($domain !== false) {
$addr = "$local@$domain";
return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE);

76
lib/Parser/XML/Construct.php

@ -146,6 +146,65 @@ abstract class Construct {
return null;
}
/** Finds and parses RSS person-texts and returns a collection of person objects
*
* Each can have a name, e-mail address, or both
*
* The following forms will yield both a name and address:
*
* - user@example.com (Full Name)
* - Full Name <user@example.com>
* - Full Name <mailto:user@example.com>
*/
protected function fetchPeople(string $query, string $role): ?PersonCollection {
$out = new PersonCollection;
foreach ($this->fetchString($query, ".+", true) ?? [] as $person) {
if (!strlen($person)) {
continue;
}
$p = new Person;
if (preg_match("/^([^@\s]+@\S+) \((.+?)\)$/", $person, $match)) { // tests "user@example.com (Full Name)" form
if ($this->validateMail($match[1])) {
$p->name = trim($match[2]);
$p->mail = $match[1];
} else {
$p->name = $person;
}
} elseif (preg_match("/^((?:\S|\s(?!<))+) <(?:mailto:)?([^>]+)>$/", $person, $match)) { // tests "Full Name <user@example.com>" form
if ($this->validateMail($match[2])) {
$p->name = trim($match[1]);
$p->mail = $match[2];
} else {
$p->name = $person;
}
} elseif ($this->validateMail($person)) {
$p->name = $person;
$p->mail = $person;
} else {
$p->name = $person;
}
$p->role = $role;
$out[] = $p;
}
return count($out) ? $out : null;
}
/** Returns at most a single person: podcasts implicitly have only one author or webmaster */
protected function fetchPodPerson(string $prefix, string $role): ?PersonCollection {
assert(in_array($prefix, ["apple", "gplay"]));
assert(in_array($role, ["author", "webmaster"]));
$prefix = ($role === "webmaster") ? "$prefix:owner/$prefix" : $prefix;
$out = new PersonCollection;
$p = new Person;
$p->name = $this->fetchString("$prefix:author", ".+") ?? "";
$p->mail = $this->fetchString("$prefix:email", "[^@]+@.+");
$p->role = $role;
if (strlen($p->name)) {
$out[] = $p;
}
return count($out) ? $out : null;
}
/** Returns a node-list of Atom link elements with the desired relation or equivalents.
*
* Links without an href attribute are excluded.
@ -262,6 +321,23 @@ abstract class Construct {
return $populated ? $out : null;
}
/** Finds and parses Atom person-constructs, and returns a collection of Person objects */
protected function fetchAtomPeople(string $query, string $role): ?PersonCollection {
$nodes = $this->xpath->query($query, $this->subject);
$out = new PersonCollection;
foreach ($nodes as $node) {
$p = new Person;
$p->mail = $this->fetchString("atom:email", $node);;
$p->name = $this->fetchString("atom:name", $node) ?? $p->mail;
$p->url = $this->fetchUrl("atom:uri", $node);
$p->role = $role;
if (strlen($p->name ?? "")) {
$out[] = $p;
}
}
return count($out) ? $out : null;
}
/** Primitive to fetch an Atom feed/entry identifier */
protected function getIdAtom(): ?string {
return $this->fetchString("atom:id", ".+");

24
lib/Parser/XML/Feed.php

@ -194,10 +194,26 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
}
public function getPeople(): PersonCollection {
$authors = $this->getAuthorsAtom() ?? $this->getAuthorsDC() ?? $this->getAuthorsPod() ?? $this->getAuthorsRss2() ?? new PersonCollection;
$contributors = $this->getContributorsAtom() ?? $this->getContributorsDC() ?? new PersonCollection;
$editors = $this->getEditorsRss2() ?? new PersonCollection;
$webmasters = $this->getWebmastersPod() ?? $this->getWebmastersRss2() ?? new PersonCollection;
$authors =
$this->fetchAtomPeople("atom:author", "author") // Atom authors
?? $this->fetchPeople("dc:creator", "author") // Dublin Core creators
?? $this->fetchPeople("rss2:author", "author") // RSS 2.0 authors
?? $this->fetchPodPerson("gplay", "author") // Google Play author
?? $this->fetchPodPerson("apple", "author") // iTunes author
?? new PersonCollection;
$contributors =
$this->fetchAtomPeople("atom:contributor", "contributor") // Atom contributors
?? $this->fetchPeople("dc:contributor", "contributor") // Dublin Core contributors
?? new PersonCollection;
$editors =
$this->fetchPeople("rss2:managingEditor", "editor") // RSS 2.0 editors
?? $this->fetchPeople("dc:publisher", "editor") // Dublin Core publishers
?? new PersonCollection;
$webmasters =
$this->fetchPeople("rss2:webMaster", "webmaster") // RSS 2.0 authors
?? $this->fetchPodPerson("gplay", "webmaster") // Google Play author
?? $this->fetchPodPerson("apple", "webmaster") // iTunes author
?? new PersonCollection;
return $authors->merge($contributors, $editors, $webmasters);
}

Loading…
Cancel
Save