@ -5,6 +5,7 @@ namespace JKingWeb\Arsse;
use JKingWeb\Arsse\Misc\Date;
use JKingWeb\Arsse\Misc\Date;
use PicoFeed\PicoFeedException;
use PicoFeed\PicoFeedException;
use PicoFeed\Config\Config;
use PicoFeed\Config\Config;
use PicoFeed\Client\Client;
use PicoFeed\Reader\Reader;
use PicoFeed\Reader\Reader;
use PicoFeed\Reader\Favicon;
use PicoFeed\Reader\Favicon;
use PicoFeed\Scraper\Scraper;
use PicoFeed\Scraper\Scraper;
@ -12,31 +13,37 @@ use PicoFeed\Scraper\Scraper;
class Feed {
class Feed {
public $data = null;
public $data = null;
public $favicon;
public $favicon;
public $parser;
public $reader;
public $resource;
public $resource;
public $modified = false;
public $modified = false;
public $lastModified;
public $lastModified;
public $nextFetch;
public $nextFetch;
public $newItems = [];
public $newItems = [];
public $changedItems = [];
public $changedItems = [];
public static function discover(string $url, string $username = '', string $password = ''): string {
// fetch the candidate feed
$f = self::download($url, "", "", $username, $password);
if ($f->reader->detectFormat($f->getContent())) {
// if the prospective URL is a feed, use it
$out = $url;
} else {
$links = $f->reader->find($f->getUrl(), $f->getContent());
if (!$links) {
// work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
libxml_use_internal_errors(false);
throw new Feed\Exception($url, new \PicoFeed\Reader\SubscriptionNotFoundException('Unable to find a subscription'));
} else {
$out = $links[0];
}
}
// work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
libxml_use_internal_errors(false);
return $out;
}
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false, bool $discover = false) {
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false) {
// set the configuration
$userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)',
Arsse::VERSION, // Arsse version
php_uname('s'), // OS
php_uname('r'), // OS version
php_uname('m') // platform architecture
);
$this->config = new Config;
$this->config->setMaxBodySize(Arsse::$conf->fetchSizeLimit);
$this->config->setClientTimeout(Arsse::$conf->fetchTimeout);
$this->config->setGrabberTimeout(Arsse::$conf->fetchTimeout);
$this->config->setClientUserAgent($userAgent);
$this->config->setGrabberUserAgent($userAgent);
// fetch the feed
// fetch the feed
$this->download($url, $lastModified, $etag, $username, $password, $discover);
$this->resource = self::download($url, $lastModified, $etag, $username, $password);
// format the HTTP Last-Modified date returned
// format the HTTP Last-Modified date returned
$lastMod = $this->resource->getLastModified();
$lastMod = $this->resource->getLastModified();
if (strlen($lastMod)) {
if (strlen($lastMod)) {
@ -65,26 +72,40 @@ class Feed {
$this->nextFetch = $this->computeNextFetch();
$this->nextFetch = $this->computeNextFetch();
}
}
protected function download(string $url, string $lastModified, string $etag, string $username, string $password, bool $discover): bool {
protected static function configure(): Config {
$action = $discover ? "discover" : "download";
$userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://thearsse.com/) PicoFeed (https://github.com/miniflux/picoFeed)',
Arsse::VERSION, // Arsse version
php_uname('s'), // OS
php_uname('r'), // OS version
php_uname('m') // platform architecture
);
$config = new Config;
$config->setMaxBodySize(Arsse::$conf->fetchSizeLimit);
$config->setClientTimeout(Arsse::$conf->fetchTimeout);
$config->setGrabberTimeout(Arsse::$conf->fetchTimeout);
$config->setClientUserAgent($userAgent);
$config->setGrabberUserAgent($userAgent);
return $config;
}
protected static function download(string $url, string $lastModified, string $etag, string $username, string $password): Client {
try {
try {
$this->reader = new Reader($this->config);
$reader = new Reader(self::configure());
$this->resource = $this->reader->$action($url, $lastModified, $etag, $username, $password);
$client = $reader->download($url, $lastModified, $etag, $username, $password);
$client->reader = $reader;
return $client;
} catch (PicoFeedException $e) {
} catch (PicoFeedException $e) {
throw new Feed\Exception($url, $e);
throw new Feed\Exception($url, $e);
}
}
return true;
}
}
protected function parse(): bool {
protected function parse(): bool {
try {
try {
$this->parser = $this->reader->getParser(
$feed = $ this->resource ->reader->getParser(
$this->resource->getUrl(),
$this->resource->getUrl(),
$this->resource->getContent(),
$this->resource->getContent(),
$this->resource->getEncoding()
$this->resource->getEncoding()
);
)->execute();
$feed = $this->parser->execute();
// Grab the favicon for the feed; returns an empty string if it cannot find one.
// Grab the favicon for the feed; returns an empty string if it cannot find one.
// Some feeds might use a different domain (eg: feedburner), so the site url is
// Some feeds might use a different domain (eg: feedburner), so the site url is
// used instead of the feed's url.
// used instead of the feed's url.
@ -388,7 +409,7 @@ class Feed {
}
}
protected function scrape(): bool {
protected function scrape(): bool {
$scraper = new Scraper($this->config );
$scraper = new Scraper(self::configure() );
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
$scraper->setUrl($item->url);
$scraper->setUrl($item->url);
$scraper->execute();
$scraper->execute();