diff --git a/lib/Parser/Construct.php b/lib/Parser/Construct.php index d74929f..f5c117e 100644 --- a/lib/Parser/Construct.php +++ b/lib/Parser/Construct.php @@ -34,14 +34,13 @@ trait Construct { * Accepts IDN hosts and Unicode localparts */ protected function validateMail(string $addr): bool { - $out = preg_match("/^(.+?)@([^@]+)$/", $addr, $match); - if (!$out) { + if (!preg_match("/^(.+?)@([^@]+)$/", $addr, $match)) { return false; } $local = $match[1]; $domain = $match[2]; // PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformation first - $domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think) + $domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); if ($domain !== false) { $addr = "$local@$domain"; return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE); diff --git a/lib/Parser/XML/Construct.php b/lib/Parser/XML/Construct.php index 14f24c0..7224b05 100644 --- a/lib/Parser/XML/Construct.php +++ b/lib/Parser/XML/Construct.php @@ -146,6 +146,65 @@ abstract class Construct { return null; } + /** Finds and parses RSS person-texts and returns a collection of person objects + * + * Each can have a name, e-mail address, or both + * + * The following forms will yield both a name and address: + * + * - user@example.com (Full Name) + * - Full Name + * - Full Name + */ + protected function fetchPeople(string $query, string $role): ?PersonCollection { + $out = new PersonCollection; + foreach ($this->fetchString($query, ".+", true) ?? [] as $person) { + if (!strlen($person)) { + continue; + } + $p = new Person; + if (preg_match("/^([^@\s]+@\S+) \((.+?)\)$/", $person, $match)) { // tests "user@example.com (Full Name)" form + if ($this->validateMail($match[1])) { + $p->name = trim($match[2]); + $p->mail = $match[1]; + } else { + $p->name = $person; + } + } elseif (preg_match("/^((?:\S|\s(?!<))+) <(?:mailto:)?([^>]+)>$/", $person, $match)) { // tests "Full Name " form + if ($this->validateMail($match[2])) { + $p->name = trim($match[1]); + $p->mail = $match[2]; + } else { + $p->name = $person; + } + } elseif ($this->validateMail($person)) { + $p->name = $person; + $p->mail = $person; + } else { + $p->name = $person; + } + $p->role = $role; + $out[] = $p; + } + return count($out) ? $out : null; + } + + /** Returns at most a single person: podcasts implicitly have only one author or webmaster */ + protected function fetchPodPerson(string $prefix, string $role): ?PersonCollection { + assert(in_array($prefix, ["apple", "gplay"])); + assert(in_array($role, ["author", "webmaster"])); + $prefix = ($role === "webmaster") ? "$prefix:owner/$prefix" : $prefix; + $out = new PersonCollection; + $p = new Person; + $p->name = $this->fetchString("$prefix:author", ".+") ?? ""; + $p->mail = $this->fetchString("$prefix:email", "[^@]+@.+"); + $p->role = $role; + if (strlen($p->name)) { + $out[] = $p; + } + return count($out) ? $out : null; + } + /** Returns a node-list of Atom link elements with the desired relation or equivalents. * * Links without an href attribute are excluded. @@ -262,6 +321,23 @@ abstract class Construct { return $populated ? $out : null; } + /** Finds and parses Atom person-constructs, and returns a collection of Person objects */ + protected function fetchAtomPeople(string $query, string $role): ?PersonCollection { + $nodes = $this->xpath->query($query, $this->subject); + $out = new PersonCollection; + foreach ($nodes as $node) { + $p = new Person; + $p->mail = $this->fetchString("atom:email", $node);; + $p->name = $this->fetchString("atom:name", $node) ?? $p->mail; + $p->url = $this->fetchUrl("atom:uri", $node); + $p->role = $role; + if (strlen($p->name ?? "")) { + $out[] = $p; + } + } + return count($out) ? $out : null; + } + /** Primitive to fetch an Atom feed/entry identifier */ protected function getIdAtom(): ?string { return $this->fetchString("atom:id", ".+"); diff --git a/lib/Parser/XML/Feed.php b/lib/Parser/XML/Feed.php index 28022d4..1c6d6b7 100644 --- a/lib/Parser/XML/Feed.php +++ b/lib/Parser/XML/Feed.php @@ -194,10 +194,26 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { } public function getPeople(): PersonCollection { - $authors = $this->getAuthorsAtom() ?? $this->getAuthorsDC() ?? $this->getAuthorsPod() ?? $this->getAuthorsRss2() ?? new PersonCollection; - $contributors = $this->getContributorsAtom() ?? $this->getContributorsDC() ?? new PersonCollection; - $editors = $this->getEditorsRss2() ?? new PersonCollection; - $webmasters = $this->getWebmastersPod() ?? $this->getWebmastersRss2() ?? new PersonCollection; + $authors = + $this->fetchAtomPeople("atom:author", "author") // Atom authors + ?? $this->fetchPeople("dc:creator", "author") // Dublin Core creators + ?? $this->fetchPeople("rss2:author", "author") // RSS 2.0 authors + ?? $this->fetchPodPerson("gplay", "author") // Google Play author + ?? $this->fetchPodPerson("apple", "author") // iTunes author + ?? new PersonCollection; + $contributors = + $this->fetchAtomPeople("atom:contributor", "contributor") // Atom contributors + ?? $this->fetchPeople("dc:contributor", "contributor") // Dublin Core contributors + ?? new PersonCollection; + $editors = + $this->fetchPeople("rss2:managingEditor", "editor") // RSS 2.0 editors + ?? $this->fetchPeople("dc:publisher", "editor") // Dublin Core publishers + ?? new PersonCollection; + $webmasters = + $this->fetchPeople("rss2:webMaster", "webmaster") // RSS 2.0 authors + ?? $this->fetchPodPerson("gplay", "webmaster") // Google Play author + ?? $this->fetchPodPerson("apple", "webmaster") // iTunes author + ?? new PersonCollection; return $authors->merge($contributors, $editors, $webmasters); }