data = $data; $this->contentType = $contentType; if (strlen($url ?? "")) { $this->url = $url; } } /** Performs initialization of the instance */ protected function init(FeedStruct $feed): FeedStruct { $this->document = new \DOMDocument(); if (!$this->document->loadXML($this->data, self::LIBXML_OPTIONS)) { throw new Exception("notXML"); } $this->document->documentURI = $this->url; $this->xpath = new XPath($this->document); $this->subject = $this->document->documentElement; $ns = $this->subject->namespaceURI; $name = $this->subject->localName; if (is_null($ns) && $name === "rss") { $this->subject = $this->fetchElement("channel") ?? $this->subject; $feed->format = "rss"; $feed->version = $this->document->documentElement->hasAttribute("version") ? $this->document->documentElement->getAttribute("version") : null; } elseif ($ns === XPath::NS['rdf'] && $name === "RDF") { $feed->format = "rdf"; $channel = $this->fetchElement("rss1:channel|rss0:channel"); if ($channel) { $this->subject = $channel; $feed->version = ($channel->namespaceURI === XPath::NS['rss1']) ? "1.0" : "0.90"; } else { $element = $this->fetchElement("rss1:item|rss0:item|rss1:image|rss0:image"); if ($element) { $feed->version = ($element->namespaceURI === XPath::NS['rss1']) ? "1.0" : "0.90"; } else { throw new Exception("notXMLFeed"); } } } elseif ($ns === XPath::NS['atom'] && $name === "feed") { $feed->format = "atom"; $feed->version = "1.0"; } else { throw new Exception("notXMLFeed"); } $feed->meta->url = $this->url; return $feed; } /** Parses the feed to extract data */ public function parse(FeedStruct $feed = null): FeedStruct { $feed = $this->init($feed ?? new FeedStruct); $feed->meta->url = strlen($this->url ?? "") ? new Url($this->url) : null; $feed->sched = $this->getSchedule(); $feed->id = $this->getId(); $feed->lang = $this->getLang(); $feed->url = $this->getUrl(); $feed->link = $this->getLink(); $feed->title = $this->getTitle(); //$feed->summary = $this->getSummary(); //$feed->dateModified = $this->getDateModified(); //$feed->icon = $this->getIcon(); //$feed->image = $this->getImage(); //$feed->people = $this->getPeople(); //$feed->categories = $this->getCategories(); //$feed->entries = $this->getEntries($feed); return $feed; } public function getId(): ?string { return $this->getIdAtom() ?? $this->getIdDC() ?? $this->getIdRss2(); } public function getSchedule(): Schedule { $sched = new Schedule; $sched->interval = $this->getSchedIntervalRss1() ?? $this->getSchedIntervalRss2(); $sched->skip = $this->getSchedSkipRss2(); $sched->expired = $this->getExpiredPod(); if (is_null($sched->expired) && (($sched->skip & Schedule::DAY_ALL) == Schedule::DAY_ALL || ($sched->skip & Schedule::HOUR_ALL) == Schedule::HOUR_ALL)) { $sched->expired = true; } if ($sched->interval) { $sched->base = $this->getSchedBaseRss1(); } return $sched; } public function getLang(): ?string { return $this->getLangXML() ?? $this->getLangDC() ?? $this->getLangRss2(); } public function getUrl(): ?Url { return $this->getUrlAtom() ?? $this->getUrlRss1() ?? $this->getUrlPod(); } public function getLink(): ?Url { return $this->getLinkAtom() ?? $this->getLinkRss1() ?? $this->getLinkRss2(); } public function getTitle(): ?Text { return $this->getTitleAtom() ?? $this->getTitleRss1() ?? $this->getTitleRss2() ?? $this->getTitleDC() ?? $this->getTitlePod(); } public function getSummary(): ?Text { // unlike most other data, Atom is not preferred, because Atom doesn't really have feed summaries return $this->getSummaryDC() ?? $this->getSummaryRss1() ?? $this->getSummaryRss2() ?? $this->getSummaryPod() ?? $this->getSummaryAtom(); } public function getCategories(): CategoryCollection { return $this->getCategoriesAtom() ?? $this->getCategoriesRss2() ?? $this->getCategoriesDC() ?? $this->getCategoriesPod() ?? new CategoryCollection; } public function getPeople(): PersonCollection { $authors = $this->getAuthorsAtom() ?? $this->getAuthorsDC() ?? $this->getAuthorsPod() ?? $this->getAuthorsRss2() ?? new PersonCollection; $contributors = $this->getContributorsAtom() ?? $this->getContributorsDC() ?? new PersonCollection; $editors = $this->getEditorsRss2() ?? new PersonCollection; $webmasters = $this->getWebmastersPod() ?? $this->getWebmastersRss2() ?? new PersonCollection; return $authors->merge($contributors, $editors, $webmasters); } public function getDateModified(): ?Date { return $this->getDateModifiedAtom() ?? $this->getDateModifiedDC() ?? $this->getDateModifiedRss2(); } public function getEntries(FeedStruct $feed = null): array { return $this->getEntriesAtom() ?? $this->getEntriesRss1() ?? $this->getEntriesRss2() ?? []; } public function getIcon(): ?Url { return null; } public function getImage(): ?Url { return null; } /** Fetches the "complete" flag from an iTunes podcast */ protected function getExpiredPod(): ?bool { return $this->fetchString("apple:complete", "(?-i:Yes)") ? true : null; // case-sensitive pattern } protected function getSchedIntervalRss2(): ?\DateInterval { $ttl = (int) $this->fetchString("ttl", "\d+"); if ($ttl) { return new \DateInterval("PT{$ttl}M"); } return null; } protected function getSchedIntervalRss1(): ?\DateInterval { $period = $this->fetchString("sched:updatePeriod", "(?:year|month|week|dai|hour)ly"); if ($period) { [$p, $n] = [ "hourly" => ["TM", 60], // 60 minutes "daily" => ["TH", 24], // 24 hors "weekly" => ["D", 7], // 7 days "monthly" => ["D", 30], // 30 days "yearly" => ["M", 12], // 12 months ][strtolower($period)]; $f = max(1, (int) $this->fetchString("sched:updateFrequency", "0*[1-9]\d*")); // a frequency of zero makes no sense // divide the period by the frequency // FIXME: we must have an integer result because PHP (incorrectly) rejects fractional intervals // see https://bugs.php.net/bug.php?id=53831 $n = max(1, intdiv($n, $f)); // a frequency of zero still makes no sense, so we assume at least one subdivision return new \DateInterval("P".(strlen($p) === 1 ? "" : $p[0]).$n.$p[-1]); } return null; } protected function getSchedBaseRss1(): ?Date { return $this->fetchDate("sched:updateBase"); } /** Computes the "skip-schedule" of an RSS feed, the set of days and hours during which a feed should not be fetched */ protected function getSchedSkipRss2(): ?int { $out = 0; foreach($this->fetchString("skipHours/hour", "\d+", true) ?? [] as $h) { $out |= [ Schedule::HOUR_0, Schedule::HOUR_1, Schedule::HOUR_2, Schedule::HOUR_3, Schedule::HOUR_4, Schedule::HOUR_5, Schedule::HOUR_6, Schedule::HOUR_7, Schedule::HOUR_8, Schedule::HOUR_9, Schedule::HOUR_10, Schedule::HOUR_11, Schedule::HOUR_12, Schedule::HOUR_13, Schedule::HOUR_14, Schedule::HOUR_15, Schedule::HOUR_16, Schedule::HOUR_17, Schedule::HOUR_18, Schedule::HOUR_19, Schedule::HOUR_20, Schedule::HOUR_21, Schedule::HOUR_22, Schedule::HOUR_23, Schedule::HOUR_0, ][(int) $h] ?? 0; } foreach($this->fetchString("skipDays/day", null, true) ?? [] as $d) { $out |= [ "monday" => Schedule::DAY_MON, "tuesday" => Schedule::DAY_TUE, "wednesday" => Schedule::DAY_WED, "thursday" => Schedule::DAY_THU, "friday" => Schedule::DAY_FRI, "saturday" => Schedule::DAY_SAT, "sunday" => Schedule::DAY_SUN, "mon" => Schedule::DAY_MON, "tue" => Schedule::DAY_TUE, "wed" => Schedule::DAY_WED, "thu" => Schedule::DAY_THU, "fri" => Schedule::DAY_FRI, "sat" => Schedule::DAY_SAT, "sun" => Schedule::DAY_SUN, ][strtolower($d)] ?? 0; } return $out ?: null; } protected function getUrlAtom(): ?Url { return $this->fetchAtomRelation("self"); } protected function getUrlRss1(): ?Url { return $this->fetchUrl("(self::rss1:channel|self::rss0:channel)/@rdf:about"); } protected function getUrlPod(): ?Url { return $this->fetchUrl("apple:new-feed-url"); } }