A lax Web news feed parser
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

121 lines
4.1 KiB

<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax;
trait XMLCommonPrimitives {
/** Primitive to fetch an Atom feed/entry title
*
* This fetches the title in plain text rather than HTML, even if HTML is provided in the feed/entry
*/
protected function getTitleAtom() {
return $this->fetchTextAtom("./atom:title");
}
/** Primitive to fetch an RSS feed/entry title */
protected function getTitleRss2() {
return $this->fetchText("./title");
}
/** Primitive to fetch an RDF feed/entry title */
protected function getTitleRss1() {
return $this->fetchText("./rss1:title|./rss0:title");
}
/** Primitive to fetch a Dublin Core feed/entry title */
protected function getTitleDC() {
return $this->fetchText("./dc:title");
}
/** Primitive to fetch an Apple podcast/episdoe title */
protected function getTitleApple() {
return $this->fetchText("./apple:title");
}
/** Primitive to fetch an Atom feed/entry Web-representation URL */
protected function getLinkAtom() {
$node = $this->fetchAtomRelations();
return $node->length ? $this->resolveURL($node->item(0)->getAttribute("href"), $node->item(0)->baseURI) : null;
}
/** Primitive to fetch an RSS feed/entry Web-representation URL */
protected function getLinkRss2() {
return $this->fetchText("./link");
}
/** Primitive to fetch an RDF feed/entry Web-representation URL */
protected function getLinkRss1() {
return $this->fetchText("./rss1:link|./rss0:link");
}
/** Primitive to fetch Atom feed/entry categories */
protected function getCategoriesAtom(bool $grouped = false, bool $humanFriendly = true) {
$nodes = $this->fetchElements("./atom:category[@term]");
$out = [];
foreach ($nodes as $node) {
$scheme = $node->getAttribute("scheme");
$cat = ($humanFriendly && $node->hasAttribute("label")) ? $node->getAttribute("label") : $node->getAttribute("term");
if (!$out[$scheme]) {
$out[$scheme] = [];
}
if (!in_array($cat, $out[$scheme])) {
$out[$scheme][] = $cat;
}
}
return $out ? $out : null;
}
/** Primitive to fetch RSS feed/entry categories */
protected function getCategoriesRss2(bool $grouped = false, bool $humanFriendly = true) {
if ($grouped) {
$nodes = $this->fetchElements("./category");
$out = [];
foreach ($nodes as $node) {
$domain = $node->getAttribute("domain");
$cat = $this->trimText($node->textContent);
if (!$out[$domain]) {
$out[$domain] = [];
}
if (!in_array($cat, $out[$domain])) {
$out[$domain][] = $cat;
}
}
return $out ? $out : null;
} else {
$out = $this->fetchTextMulti("./category");
return $out ? array_keys(array_flip($out)) : null;
}
}
/** Primitive to fetch Dublin Core feed/entry categories
*
* Dublin Core doesn't have an obvious category type, so we use 'subject' as a nearest approximation
*/
protected function getCategoriesDC(bool $grouped = false, bool $humanFriendly = true) {
$out = $this->fetchTextMulti("./dc:subject");
if ($out) {
$out = array_keys(array_flip($out));
return $grouped ? ['' => $out] : $out;
}
return null;
}
/** Primitive to fetch RSS feed/entry categories */
protected function getCategoriesApple(bool $grouped = false, bool $humanFriendly = true) {
$nodes = $this->fetchElements("./apple:category");
$out = [];
foreach ($nodes as $node) {
$cat = $this->trimText($node->getAttribute("text"));
if (strlen($cat)) {
$out[] = $cat;
}
}
$out = array_keys(array_flip($out));
return $grouped ? ['' => $out] : $out;
}
}