diff --git a/lib/Parser/XML/Construct.php b/lib/Parser/XML/Construct.php
index fe609fc..5c3a41d 100644
--- a/lib/Parser/XML/Construct.php
+++ b/lib/Parser/XML/Construct.php
@@ -26,6 +26,9 @@ abstract class Construct {
protected const DATE_EARLIEST = 2;
protected const DATE_ALL = 3;
+ protected const QUERY_AMBIGUOUS_DATES = "rss2:pubDate|rss2:lastBuildDate|dc:date|dc:available|dc:dateAccepted|dc:dateCopyrighted|dc:dateSubmitted|dc:issued|dc:modified|dc:valid|dct:date|dct:available|dct:dateAccepted|dct:dateCopyrighted|dct:dateSubmitted|dct:issued|dct:modified|dct:valid";
+ protected const QUERY_RSS_PERMALINK = "rss2:guid[not(@isPermaLink) or @isPermaLink='true']";
+
/** @var \DOMDocument */
protected $document;
/** @var \DOMXPath */
@@ -359,7 +362,7 @@ abstract class Construct {
}
protected function getLinkRss2(): ?Url {
- return $this->fetchUrl("rss2:link") ?? $this->fetchUrl("rss2:guid[not(@isPermalink) or @isPermalink='true']");
+ return $this->fetchUrl("rss2:link") ?? $this->fetchUrl(self::QUERY_RSS_PERMALINK);
}
protected function getLinkRss1(): ?Url {
diff --git a/lib/Parser/XML/Entry.php b/lib/Parser/XML/Entry.php
index 93867f5..13ce060 100644
--- a/lib/Parser/XML/Entry.php
+++ b/lib/Parser/XML/Entry.php
@@ -88,8 +88,8 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
well-defined semantics here. Thus the semantics of all the other
formats are equal, and we want the latest date, whatever it is.
*/
- return $this->fetchDate("atom:updated", self::DATE_LATEST) // Atom update date
- ?? $this->fetchDate("dc:date|rss2:pubDate|rss2:lastBuildDate", self::DATE_LATEST); // Latest other datee
+ return $this->fetchDate("atom:updated", self::DATE_LATEST) // Atom update date
+ ?? $this->fetchDate(self::QUERY_AMBIGUOUS_DATES, self::DATE_LATEST); // Latest other datee
}
public function getDateCreated(): ?Date {
@@ -98,8 +98,9 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
formats are equal, and we want the earliest date, but only if
there are at least two
*/
- return $this->fetchDate("atom:created", self::DATE_EARLIEST) // Atom creation date
- ?? $this->getAssumedDateCreated(); // Earliest other date
+ return $this->fetchDate("atom:created", self::DATE_EARLIEST) // Atom creation date
+ ?? $this->fetchDate("dct:created|dc:created", self::DATE_LATEST) // Dublin Core creation date
+ ?? $this->getAssumedDateCreated(); // Earliest other date
}
public function getContent(): ?Text {
@@ -146,7 +147,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
*/
protected function getLinkAndRelatedRss2(): array {
$link = $this->fetchUrl("rss2:link");
- $guid = $this->fetchUrl("rss2:guid[not(@isPermalink) or @isPermalink='true']");
+ $guid = $this->fetchUrl(self::QUERY_RSS_PERMALINK);
if ($link && $guid) {
if ($link->getScheme() !== $guid->getScheme() || $link->getAuthority() !== $guid->getAuthority()) {
return [$guid, $link];
@@ -156,7 +157,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
}
protected function getAssumedDateCreated(): ?Date {
- $dates = $this->fetchDate("dc:date|rss2:pubDate|rss2:lastBuildDate", self::DATE_ALL);
+ $dates = $this->fetchDate(self::QUERY_AMBIGUOUS_DATES, self::DATE_ALL);
if (sizeof($dates) > 1) {
return $dates[0];
}
diff --git a/lib/Parser/XML/Feed.php b/lib/Parser/XML/Feed.php
index a0f460c..5f2cfeb 100644
--- a/lib/Parser/XML/Feed.php
+++ b/lib/Parser/XML/Feed.php
@@ -164,7 +164,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
formats are equal, and we want the latest date, whatever it is.
*/
return $this->fetchDate("atom:updated", self::DATE_LATEST)
- ?? $this->fetchDate("dc:date|rss2:pubDate|rss2:lastBuildDate", self::DATE_LATEST);
+ ?? $this->fetchDate(self::QUERY_AMBIGUOUS_DATES, self::DATE_LATEST);
}
public function getIcon(): ?Url {
diff --git a/lib/Parser/XML/XPath.php b/lib/Parser/XML/XPath.php
index 365c9c7..b096440 100644
--- a/lib/Parser/XML/XPath.php
+++ b/lib/Parser/XML/XPath.php
@@ -13,6 +13,7 @@ class XPath extends \DOMXpath {
'rss1' => "http://purl.org/rss/1.0/", // RDF site summary 1.0 http://purl.org/rss/1.0/spec
'rss0' => "http://channel.netscape.com/rdf/simple/0.9/", // RDF Site Summary 0.90 http://www.rssboard.org/rss-0-9-0
'dc' => "http://purl.org/dc/elements/1.1/", // Dublin Core metadata http://purl.org/rss/1.0/modules/dc/
+ 'dct' => "http://purl.org/dc/terms/", // Dublin Core terms https://web.archive.org/web/20071222055924/http://web.resource.org/rss/1.0/modules/dcterms/
'sched' => "http://purl.org/rss/1.0/modules/syndication/", // Syndication schedule extension http://purl.org/rss/1.0/modules/syndication/
'enc' => "http://purl.org/rss/1.0/modules/content/", // Explicitly encoded content extension http://purl.org/rss/1.0/modules/content/
'media' => "http://search.yahoo.com/mrss/", // Embedded media extension http://www.rssboard.org/media-rss
diff --git a/tests/cases/XML/entry-rss2.yaml b/tests/cases/XML/entry-rss2.yaml
index 90faa24..bf9bc8b 100644
--- a/tests/cases/XML/entry-rss2.yaml
+++ b/tests/cases/XML/entry-rss2.yaml
@@ -10,7 +10,7 @@ GUID:
input: >
-
- blah
+ blah
output:
@@ -22,7 +22,7 @@ Language:
input: >
-
- blah
+ blah
fr
@@ -36,7 +36,7 @@ Entry link:
input: >
-
- http://example.com/
+ http://example.com/
-
http://example.com/
@@ -58,7 +58,7 @@ Related link:
input: >
-
- http://example.com/
+ http://example.com/
http://example.net/
-
@@ -66,7 +66,7 @@ Related link:
http://example.net/
-
- http://example.com/
+ http://example.com/
http://example.net/
-
diff --git a/tests/cases/XML/feed-rss2.yaml b/tests/cases/XML/feed-rss2.yaml
index 67bfa71..a60365d 100644
--- a/tests/cases/XML/feed-rss2.yaml
+++ b/tests/cases/XML/feed-rss2.yaml
@@ -9,7 +9,7 @@ Channel GUID:
input: >
- http://example.com/
+ http://example.com/
output:
@@ -20,7 +20,7 @@ Channel GUID with whitespace:
input: >
-
+
http://example.com/
@@ -33,7 +33,7 @@ Root GUID: # Any elements on the RSS2 root element should be ignored
input: >
- http://example.com/
+ http://example.com/
output:
format: rss
@@ -43,7 +43,7 @@ Bogus GUID before good:
- http://example.com/
+ http://example.com/
output:
@@ -255,7 +255,7 @@ Feed link via GUID 1:
Feed link via GUID 2:
input: >
- http://example.com/
+ http://example.com/
output:
format: rss
@@ -265,7 +265,7 @@ Feed link via GUID 2:
GUID not a link:
input: >
- http://example.com/
+ http://example.com/
output:
format: rss