From 1669c562edc0a6234d7a1869f64a6e286184600a Mon Sep 17 00:00:00 2001 From: "J. King" Date: Sun, 25 Feb 2018 22:53:02 -0500 Subject: [PATCH] Add feed modification dates --- lib/Construct.php | 58 ++++++++++++++++++++++++++++++++ lib/Feed.php | 5 +++ lib/JSON/Construct.php | 6 ++++ lib/JSON/Feed.php | 8 +++++ lib/XML/Construct.php | 6 ++++ lib/XML/Feed.php | 5 +++ lib/XML/Primitives/Construct.php | 10 ++++++ lib/XML/Primitives/Entry.php | 10 ++++++ lib/XML/Primitives/Feed.php | 7 +++- 9 files changed, 114 insertions(+), 1 deletion(-) diff --git a/lib/Construct.php b/lib/Construct.php index 2a7782d..690905f 100644 --- a/lib/Construct.php +++ b/lib/Construct.php @@ -10,6 +10,48 @@ use JKingWeb\Lax\Person\Person; trait Construct { + protected $dateFormats = [ + "D, d M Y H:i:s O", + "D, d M Y H:i:sO", + "D, d M Y H:i:s P", + "D, d M Y H:i:sP", + "D, d M Y H:i:s T", + "D, d M Y H:i:s", + "D, d M y H:i:s O", + "D, d M y H:i:sO", + "D, d M y H:i:s P", + "D, d M y H:i:sP", + "D, d M y H:i:s T", + "D, d M y H:i:s", + "Y-m-d\TH:i:s O", + "Y-m-d\TH:i:sO", + "Y-m-d\TH:i:s P", + "Y-m-d\TH:i:sP", + "Y-m-d\TH:i:s T", + "Y-m-d\TH:i:s", + "Y-m-d H:i:s O", + "Y-m-d H:i:sO", + "Y-m-d H:i:s P", + "Y-m-d H:i:sP", + "Y-m-d H:i:s T", + "Y-m-d H:i:s\Z", + "Y-m-d H:i:s", + "Y-m-d\TH:i:s.u O", + "Y-m-d\TH:i:s.uO", + "Y-m-d\TH:i:s.u P", + "Y-m-d\TH:i:s.uP", + "Y-m-d\TH:i:s.u T", + "Y-m-d\TH:i:s.u\Z", + "Y-m-d\TH:i:s.u", + "Y-m-d H:i:s.u O", + "Y-m-d H:i:s.uO", + "Y-m-d H:i:s.u P", + "Y-m-d H:i:s.uP", + "Y-m-d H:i:s.u T", + "Y-m-d H:i:s.u\Z", + "Y-m-d H:i:s.u", + ]; + /** Trims plain text and collapses whitespace */ protected function trimText(string $text): string { return trim(preg_replace("<\s{2,}>s", " ", $text)); @@ -90,4 +132,20 @@ trait Construct { $flags = defined("\FILTER_FLAG_EMAIL_UNICODE") ? \FILTER_FLAG_EMAIL_UNICODE : 0; return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, $flags); } + + protected function parseDate(string $date) { + $out = null; + $date = $this->trimText($date); + if (!strlen($date)) { + return $out; + } + $tz = new \DateTimeZone("UTC"); + foreach ($this->dateFormats as $format) { + $out = \DateTimeImmutable::createFromFormat($format, $date, $tz); + if ($out) { + break; + } + } + return $out ?: null; + } } diff --git a/lib/Feed.php b/lib/Feed.php index 1b2b2a6..27fcb61 100644 --- a/lib/Feed.php +++ b/lib/Feed.php @@ -21,6 +21,7 @@ abstract class Feed { public $categories; public $people; public $author; + public $dateModified; /** Constructs a parsed feed */ abstract public function __construct(string $data, string $contentType = "", string $url = ""); @@ -34,6 +35,7 @@ abstract class Feed { $this->summary = $this->getSummary(); $this->people = $this->getPeople(); $this->author = $this->people->primary(); + $this->dateModified = $this->getDateModified(); // do a second pass on missing data we'd rather fill in $this->link = strlen($this->link) ? $this->link : $this->url; $this->title = strlen($this->title) ? $this->title : $this->link; @@ -64,4 +66,7 @@ abstract class Feed { /** General function to fetch a collection of people associated with a feed */ abstract public function getPeople(): PersonCollection; + + /** General function to fetch the feed's modification date */ + abstract public function getDateModified(); } diff --git a/lib/JSON/Construct.php b/lib/JSON/Construct.php index f28f551..2fb91df 100644 --- a/lib/JSON/Construct.php +++ b/lib/JSON/Construct.php @@ -46,8 +46,14 @@ trait Construct { } } + /** Returns an object member as a resolved URL */ protected function fetchUrl(string $key, \stdClass $obj = null) { $url = $this->fetchMember($key, "str", $obj); return (!is_null($url)) ? $this->resolveUrl($url, $this->url) : $url; } + + /** Returns an object member as a parsed date */ + protected function fetchDate(string $key, \stdClass $obj = null) { + return $this->parseDate($this->fetchMember($key, "str", $obj) ?? ""); + } } diff --git a/lib/JSON/Feed.php b/lib/JSON/Feed.php index 5d11bde..da4faec 100644 --- a/lib/JSON/Feed.php +++ b/lib/JSON/Feed.php @@ -82,4 +82,12 @@ class Feed extends \JKingWeb\Lax\Feed { return $out; } } + + /** General function to fetch the modification date of a feed + * + * JSON feeds themselves don't have dates, so this always returns null + */ + public function getDateModified() { + return null; + } } diff --git a/lib/XML/Construct.php b/lib/XML/Construct.php index 048dc6c..88456d8 100644 --- a/lib/XML/Construct.php +++ b/lib/XML/Construct.php @@ -68,6 +68,11 @@ trait Construct { } } + /** Retrieves and parses a date from the content of a DOM element based on an XPath query */ + protected function fetchDate(string $query, \DOMNode $context = null) { + return $this->parseDate($this->fetchText($query, $context) ?? ""); + } + /** Returns a node-list of Atom link elements with the desired relation or equivalents. * * Links without an href attribute are excluded. @@ -102,6 +107,7 @@ trait Construct { return $this->resolveURL($url, $base); } + /** Populates a Person object according to the children of an Atom element */ protected function parsePersonAtom(\DOMNode $node) { $p = new Person; $p->mail = $this->fetchText("./atom:email", $node) ?? ""; diff --git a/lib/XML/Feed.php b/lib/XML/Feed.php index fcc8a08..4dc3df5 100644 --- a/lib/XML/Feed.php +++ b/lib/XML/Feed.php @@ -117,4 +117,9 @@ class Feed extends \JKingWeb\Lax\Feed { return $out->merge(($this->getPeopleRss2() ?? new PersonCollection)->filterOutRole("author")); } } + + /** General function to fetch the modification date of a feed */ + public function getDateModified() { + return $this->getDateModifiedAtom() ?? $this->getDateModifiedDC() ?? $this->getDateModifiedRss2(); + } } diff --git a/lib/XML/Primitives/Construct.php b/lib/XML/Primitives/Construct.php index 54fcadf..a565d23 100644 --- a/lib/XML/Primitives/Construct.php +++ b/lib/XML/Primitives/Construct.php @@ -189,4 +189,14 @@ trait Construct { $node = $this->fetchAtomRelations("self"); return $node->length ? $this->resolveNodeUrl($node->item(0), "href") : null; } + + /** Primitive to fetch the modification date of an Atom feed/entry */ + protected function getDateModifiedAtom() { + return $this->fetchDate("./atom:updated"); + } + + /** Primitive to fetch the modification date of an Atom feed/entry */ + protected function getDateModifiedDC() { + return $this->fetchDate("./dc:date"); + } } diff --git a/lib/XML/Primitives/Entry.php b/lib/XML/Primitives/Entry.php index a2c22ff..7628b36 100644 --- a/lib/XML/Primitives/Entry.php +++ b/lib/XML/Primitives/Entry.php @@ -67,4 +67,14 @@ trait Entry { return null; } } + + /** Primitive to fetch the modification date of an RSS feed */ + protected function getDateModifiedRss2() { + return $this->fetchDate("./pubDate"); + } + + /** Primitive to fetch the modification date of an Atom feed/entry */ + protected function getDateCreatedAtom() { + return $this->fetchDate("./atom:published"); + } } diff --git a/lib/XML/Primitives/Feed.php b/lib/XML/Primitives/Feed.php index da67a41..1664122 100644 --- a/lib/XML/Primitives/Feed.php +++ b/lib/XML/Primitives/Feed.php @@ -93,7 +93,7 @@ trait Feed { return null; } - /** Primitive to fetch an podcast's canonical URL */ + /** Primitive to fetch a podcast's canonical URL */ protected function getUrlPod() { $node = $this->fetchElement("./apple:new-feed-url"); if ($node) { @@ -102,4 +102,9 @@ trait Feed { return null; } } + + /** Primitive to fetch the modification date of an RSS feed */ + protected function getDateModifiedRss2() { + return $this->fetchDate("./lastBuildDate") ?? $this->fetchDate("./pubDate"); + } }