diff --git a/lib/Parser/XML/Construct.php b/lib/Parser/XML/Construct.php index 7c15569..def8980 100644 --- a/lib/Parser/XML/Construct.php +++ b/lib/Parser/XML/Construct.php @@ -209,4 +209,22 @@ abstract class Construct { protected function getIdDC(): ?string { return $this->fetchString("dc:identifier", ".+"); } + + protected function getLangXML(): ?string { + // walk up the tree looking for the nearest language tag + $el = $this->subject; + do { + $out = $this->fetchString("@xml:lang", ".+", false, $el); + $el = $el->parentNode; + } while (is_null($out) && $el); + return $out; + } + + protected function getLangDC(): ?string { + return $this->fetchString("dc:language", ".+"); + } + + protected function getLangRss2(): ?string { + return $this->fetchString("language", ".+"); + } } diff --git a/lib/Parser/XML/Feed.php b/lib/Parser/XML/Feed.php index 44c52bf..fadb914 100644 --- a/lib/Parser/XML/Feed.php +++ b/lib/Parser/XML/Feed.php @@ -83,7 +83,7 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { $feed->meta->url = $this->url; $feed->sched = $this->getSchedule(); $feed->id = $this->getId(); - //$feed->lang = $this->getLang(); + $feed->lang = $this->getLang(); //$feed->url = $this->getUrl(); //$feed->link = $this->getLink(); //$feed->title = $this->getTitle(); @@ -115,6 +115,10 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { return $sched; } + public function getLang(): ?string { + return $this->getLangXML() ?? $this->getLangDC() ?? $this->getLangRss2(); + } + public function getUrl(): ?Url { return $this->getUrlAtom() ?? $this->getUrlRss1() ?? $this->getUrlPod(); } @@ -152,10 +156,6 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { return $this->getEntriesAtom() ?? $this->getEntriesRss1() ?? $this->getEntriesRss2() ?? []; } - public function getLang(): ?string { - return null; - } - public function getIcon(): ?Url { return null; } diff --git a/tests/cases/XML/feed-atom.yaml b/tests/cases/XML/feed-atom.yaml index 798d218..c567822 100644 --- a/tests/cases/XML/feed-atom.yaml +++ b/tests/cases/XML/feed-atom.yaml @@ -39,3 +39,18 @@ Bogus ID before good: format: atom version: '1.0' id: 'http://example.com/' + +Feed language: + input: > + + output: + format: atom + version: '1.0' + lang: en + +Bogus feed language: + input: > + + output: + format: atom + version: '1.0' diff --git a/tests/cases/XML/feed-rss0.yaml b/tests/cases/XML/feed-rss0.yaml index ae8200b..187786e 100644 --- a/tests/cases/XML/feed-rss0.yaml +++ b/tests/cases/XML/feed-rss0.yaml @@ -24,3 +24,13 @@ Minimal feed without channel 2: output: format: rdf version: '0.90' + +Feed language: # demonstrate walking up the DOM + input: > + + + + output: + format: rdf + version: '0.90' + lang: fr diff --git a/tests/cases/XML/feed-rss1.yaml b/tests/cases/XML/feed-rss1.yaml index 1f094e1..41fc53f 100644 --- a/tests/cases/XML/feed-rss1.yaml +++ b/tests/cases/XML/feed-rss1.yaml @@ -150,3 +150,16 @@ Syndication schedule base 1: sched: interval: PT24H base: '2020-03-01T20:21:12-04:00' + +DC language: + input: > + + + + de + + + output: + format: rdf + version: '1.0' + lang: de diff --git a/tests/cases/XML/feed-rss2.yaml b/tests/cases/XML/feed-rss2.yaml index 7de1e98..2d43271 100644 --- a/tests/cases/XML/feed-rss2.yaml +++ b/tests/cases/XML/feed-rss2.yaml @@ -177,7 +177,6 @@ Skip all hours: expired: true skip: 16777215 # 0b111111111111111111111111 - Skip all days and hours: input: > @@ -222,3 +221,12 @@ Skip all days and hours: sched: expired: true skip: 2147483647 # 0b1111111111111111111111111111111 + +Feed language: + input: > + + ja + + output: + format: rss + lang: ja