Browse Source

Simplify XPath queries

master
J. King 6 years ago
parent
commit
3c1d9c76ef
  1. 8
      lib/XML/Construct.php
  2. 6
      lib/XML/Feed.php
  3. 40
      lib/XML/Primitives/Construct.php
  4. 10
      lib/XML/Primitives/Entry.php
  5. 18
      lib/XML/Primitives/Feed.php
  6. 77
      lib/XML/XMLFeedPrimitives.php

8
lib/XML/Construct.php

@ -92,7 +92,7 @@ trait Construct {
} else {
$cond = "@rel='$rel'";
}
return $this->xpath->query("./atom:link[@href][$cond]", $this->subject);
return $this->xpath->query("atom:link[@href][$cond]", $this->subject);
}
/** Resolves a URL contained in a DOM element's atrribute or text
@ -110,12 +110,12 @@ trait Construct {
/** Populates a Person object according to the children of an Atom <author> element */
protected function parsePersonAtom(\DOMNode $node) {
$p = new Person;
$p->mail = $this->fetchText("./atom:email", $node) ?? "";
$p->name = $this->fetchText("./atom:name", $node) ?? $p->mail;
$p->mail = $this->fetchText("atom:email", $node) ?? "";
$p->name = $this->fetchText("atom:name", $node) ?? $p->mail;
if (!strlen($p->name)) {
return null;
}
$url = $this->fetchElement("./atom:uri", $node);
$url = $this->fetchElement("atom:uri", $node);
if ($url) {
$p->url = $this->resolveNodeUrl($url);
}

6
lib/XML/Feed.php

@ -31,17 +31,17 @@ class Feed extends \JKingWeb\Lax\Feed {
$ns = $this->subject->namespaceURI;
$name = $this->subject->localName;
if (is_null($ns) && $name=="rss") {
$this->subject = $this->fetchElement("./channel") ?? $this->subject;
$this->subject = $this->fetchElement("channel") ?? $this->subject;
$this->type = "rss";
$this->version = $this->document->documentElement->getAttribute("version");
} elseif ($ns==XPath::NS['rdf'] && $name=="RDF") {
$this->type = "rdf";
$channel = $this->fetchElement("./rss1:channel|./rss0:channel");
$channel = $this->fetchElement("rss1:channel|rss0:channel");
if ($channel) {
$this->subject = $channel;
$this->version = ($channel->namespaceURI==XPath::NS['rss1']) ? "1.0" : "0.90";
} else {
$element = $this->fetchElement("./rss1:item|./rss0:item|./rss1:image|./rss0:image");
$element = $this->fetchElement("rss1:item|rss0:item|rss1:image|rss0:image");
if ($element) {
$this->version = ($element->namespaceURI==XPath::NS['rss1']) ? "1.0" : "0.90";
}

40
lib/XML/Primitives/Construct.php

@ -16,27 +16,27 @@ trait Construct {
* This fetches the title in plain text rather than HTML, even if HTML is provided in the feed/entry
*/
protected function getTitleAtom() {
return $this->fetchTextAtom("./atom:title");
return $this->fetchTextAtom("atom:title");
}
/** Primitive to fetch an RSS feed/entry title */
protected function getTitleRss2() {
return $this->fetchText("./title");
return $this->fetchText("title");
}
/** Primitive to fetch an RDF feed/entry title */
protected function getTitleRss1() {
return $this->fetchText("./rss1:title|./rss0:title");
return $this->fetchText("rss1:title|rss0:title");
}
/** Primitive to fetch a Dublin Core feed/entry title */
protected function getTitleDC() {
return $this->fetchText("./dc:title");
return $this->fetchText("dc:title");
}
/** Primitive to fetch an Apple podcast/episdoe title */
protected function getTitlePod() {
return $this->fetchText("./apple:title");
return $this->fetchText("apple:title");
}
/** Primitive to fetch an Atom feed/entry Web-representation URL */
@ -48,19 +48,19 @@ trait Construct {
/** Primitive to fetch an RSS feed/entry Web-representation URL */
protected function getLinkRss2() {
$node = $this->fetchElement("./link");
$node = $this->fetchElement("link");
return $node ? $this->resolveNodeUrl($node) : null;
}
/** Primitive to fetch an RDF feed/entry Web-representation URL */
protected function getLinkRss1() {
$node = $this->fetchElement("./rss1:link|./rss0:link");
$node = $this->fetchElement("rss1:link|rss0:link");
return $node ? $this->resolveNodeUrl($node) : null;
}
/** Primitive to fetch Atom feed/entry categories */
protected function getCategoriesAtom(bool $grouped = false, bool $humanFriendly = true) {
$nodes = $this->fetchElements("./atom:category[@term]");
$nodes = $this->fetchElements("atom:category[@term]");
$out = [];
foreach ($nodes as $node) {
$scheme = $node->getAttribute("scheme");
@ -78,7 +78,7 @@ trait Construct {
/** Primitive to fetch RSS feed/entry categories */
protected function getCategoriesRss2(bool $grouped = false, bool $humanFriendly = true) {
if ($grouped) {
$nodes = $this->fetchElements("./category");
$nodes = $this->fetchElements("category");
$out = [];
foreach ($nodes as $node) {
$domain = $node->getAttribute("domain");
@ -92,7 +92,7 @@ trait Construct {
}
return $out ? $out : null;
} else {
$out = $this->fetchTextMulti("./category");
$out = $this->fetchTextMulti("category");
return $out ? array_keys(array_flip($out)) : null;
}
}
@ -102,7 +102,7 @@ trait Construct {
* Dublin Core doesn't have an obvious category type, so we use 'subject' as a nearest approximation
*/
protected function getCategoriesDC(bool $grouped = false, bool $humanFriendly = true) {
$out = $this->fetchTextMulti("./dc:subject");
$out = $this->fetchTextMulti("dc:subject");
if ($out) {
$out = array_keys(array_flip($out));
return $grouped ? ['' => $out] : $out;
@ -112,7 +112,7 @@ trait Construct {
/** Primitive to fetch podcast/episode categories */
protected function getCategoriesPod(bool $grouped = false, bool $humanFriendly = true) {
$nodes = $this->fetchElements("./apple:category|./gplay:category");
$nodes = $this->fetchElements("apple:category|gplay:category");
$out = [];
foreach ($nodes as $node) {
$cat = $this->trimText($node->getAttribute("text"));
@ -127,7 +127,7 @@ trait Construct {
/** Primitive to fetch an Atom feed/entry identifier */
protected function getIdAtom() {
return $this->fetchText("./atom:id");
return $this->fetchText("atom:id");
}
/** Primitive to fetch an RSS feed/entry identifier
@ -135,17 +135,17 @@ trait Construct {
* Using RSS' <guid> for feed identifiers is non-standard, but harmless
*/
protected function getIdRss2() {
return $this->fetchText("./guid");
return $this->fetchText("guid");
}
/** Primitive to fetch a Dublin Core feed/entry identifier */
protected function getIdDC() {
return $this->fetchText("./dc:identifier");
return $this->fetchText("dc:identifier");
}
/** Primitive to fetch a collection of people associated with a feed/entry via Dublin Core */
protected function getPeopleDC() {
$nodes = $this->fetchElements("./dc:creator|./dc:contributor");
$nodes = $this->fetchElements("dc:creator|dc:contributor");
if (!$nodes->length) {
return null;
}
@ -170,8 +170,8 @@ trait Construct {
* The collection only ever contains the first author found: podcasts implicitly have only one author
*/
protected function getPeoplePod() {
$name = $this->fetchText("./gplay:author|./apple:author") ?? "";
$mail = $this->fetchText("./gplay:email|./apple:email") ?? "";
$name = $this->fetchText("gplay:author|apple:author") ?? "";
$mail = $this->fetchText("gplay:email|apple:email") ?? "";
if (!strlen($name)) {
return null;
}
@ -192,11 +192,11 @@ trait Construct {
/** Primitive to fetch the modification date of an Atom feed/entry */
protected function getDateModifiedAtom() {
return $this->fetchDate("./atom:updated");
return $this->fetchDate("atom:updated");
}
/** Primitive to fetch the modification date of an Atom feed/entry */
protected function getDateModifiedDC() {
return $this->fetchDate("./dc:date");
return $this->fetchDate("dc:date");
}
}

10
lib/XML/Primitives/Entry.php

@ -13,7 +13,7 @@ trait Entry {
/** Primitive to fetch a collection of people associated with an RSS entry */
protected function getPeopleRss2() {
$nodes = $this->fetchElements("./author");
$nodes = $this->fetchElements("author");
if (!$nodes->length) {
return null;
}
@ -31,7 +31,7 @@ trait Entry {
/** Primitive to fetch a collection of people associated with an Atom entry */
protected function getPeopleAtom() {
$nodes = $this->fetchElements("./atom:author|./atom:contributor");
$nodes = $this->fetchElements("atom:author|atom:contributor");
$out = new PersonCollection;
foreach ($nodes as $node) {
$p = $this->parsePersonAtom($node);
@ -42,7 +42,7 @@ trait Entry {
$primary = $out->primary();
// if the entry has no author, we retrieve the authors (and not contributors) from the entry's source element
if (!$primary || $primary->role != "author") {
$nodes = $this->fetchElements("./atom:source[1]/atom:author");
$nodes = $this->fetchElements("atom:source[1]/atom:author");
foreach ($nodes as $node) {
$p = $this->parsePersonAtom($node);
if ($p) {
@ -70,11 +70,11 @@ trait Entry {
/** Primitive to fetch the modification date of an RSS feed */
protected function getDateModifiedRss2() {
return $this->fetchDate("./pubDate");
return $this->fetchDate("pubDate");
}
/** Primitive to fetch the modification date of an Atom feed/entry */
protected function getDateCreatedAtom() {
return $this->fetchDate("./atom:published");
return $this->fetchDate("atom:published");
}
}

18
lib/XML/Primitives/Feed.php

@ -16,32 +16,32 @@ trait Feed {
* Atom does not have a 'description' element like the RSSes, but it does have 'subtitle', which fills roughly the same function
*/
protected function getSummaryAtom() {
return $this->fetchTextAtom("./atom:subtitle");
return $this->fetchTextAtom("atom:subtitle");
}
/** Primitive to fetch an RSS feed summary */
protected function getSummaryRss2() {
return $this->fetchText("./description");
return $this->fetchText("description");
}
/** Primitive to fetch an RDF feed summary */
protected function getSummaryRss1() {
return $this->fetchText("./rss1:description|./rss0:description");
return $this->fetchText("rss1:description|rss0:description");
}
/** Primitive to fetch a Dublin Core feed summary */
protected function getSummaryDC() {
return $this->fetchText("./dc:description");
return $this->fetchText("dc:description");
}
/** Primitive to fetch a podcast summary */
protected function getSummaryPod() {
return $this->fetchText("./apple:summary|./gplay:description") ?? $this->fetchText("./apple:subtitle");
return $this->fetchText("apple:summary|gplay:description") ?? $this->fetchText("apple:subtitle");
}
/** Primitive to fetch a collection of people associated with an RSS feed */
protected function getPeopleRss2() {
$nodes = $this->fetchElements("./managingEditor|./webMaster|./author");
$nodes = $this->fetchElements("managingEditor|webMaster|author");
if (!$nodes->length) {
return null;
}
@ -64,7 +64,7 @@ trait Feed {
/** Primitive to fetch a collection of people associated with an Atom feed */
protected function getPeopleAtom() {
$nodes = $this->fetchElements("./atom:author|./atom:contributor");
$nodes = $this->fetchElements("atom:author|atom:contributor");
if (!$nodes->length) {
return null;
}
@ -95,7 +95,7 @@ trait Feed {
/** Primitive to fetch a podcast's canonical URL */
protected function getUrlPod() {
$node = $this->fetchElement("./apple:new-feed-url");
$node = $this->fetchElement("apple:new-feed-url");
if ($node) {
return $this->resolveNodeUrl($node);
} else {
@ -105,6 +105,6 @@ trait Feed {
/** Primitive to fetch the modification date of an RSS feed */
protected function getDateModifiedRss2() {
return $this->fetchDate("./lastBuildDate") ?? $this->fetchDate("./pubDate");
return $this->fetchDate("lastBuildDate") ?? $this->fetchDate("pubDate");
}
}

77
lib/XML/XMLFeedPrimitives.php

@ -0,0 +1,77 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax\XML;
trait XMLFeedPrimitives {
/** Primitive to fetch an Atom feed summary
*
* Atom does not have a 'description' element like the RSSes, but it does have 'subtitle', which fills roughly the same function
*/
protected function getSummaryAtom() {
return $this->fetchTextAtom("atom:subtitle");
}
/** Primitive to fetch an RSS feed summary */
protected function getSummaryRss2() {
return $this->fetchText("description");
}
/** Primitive to fetch an RDF feed summary */
protected function getSummaryRss1() {
return $this->fetchText("rss1:description|rss0:description");
}
/** Primitive to fetch a Dublin Core feed summary */
protected function getSummaryDC() {
return $this->fetchText("dc:description");
}
/** Primitive to fetch a podcast summary */
protected function getSummaryPod() {
return $this->fetchText("apple:summary|gplay:description") ?? $this->fetchText("apple:subtitle");
}
/** Primitive to fetch a collection of people associated with an RSS feed */
protected function getPeopleRss2() {
$nodes = $this->fetchElements("managingEditor|webMaster|author");
if (!$nodes->length) {
return null;
}
$out = new PersonCollection;
$roles = [
'managingEditor' => "editor",
'webMaster' => "webmaster",
'author' => "author",
];
foreach ($nodes as $node) {
$text = $this->trimText($node->textContent);
if (strlen($text)) {
$p = $this->parsePersonText($text);
$p->role = $roles[$node->localName];
$out[] = $p;
}
}
return $out;
}
/** Primitive to fetch a collection of people associated with an Atom feed */
protected function getPeopleAtom() {
$nodes = $this->fetchElements("atom:author|atom:contributor");
if (!$nodes->length) {
return null;
}
$out = new PersonCollection;
foreach ($nodes as $node) {
$p = $this->parsePersonAtom($node);
if ($p) {
$out[] = $p;
}
}
return $out;
}
}
Loading…
Cancel
Save