diff --git a/302EndlessLoop@i=0 b/302EndlessLoop@i=0
new file mode 100644
index 0000000..7d52af1
--- /dev/null
+++ b/302EndlessLoop@i=0
@@ -0,0 +1,4 @@
+
+Warning: Unknown: failed to open stream: No such file or directory in Unknown on line 0
+
+Fatal error: Unknown: Failed opening required '%base%\server.php' (include_path='.;C:\php\pear') in Unknown on line 0
diff --git a/lib/AbstractException.php b/lib/AbstractException.php
index b8fa079..83d2d3d 100644
--- a/lib/AbstractException.php
+++ b/lib/AbstractException.php
@@ -64,7 +64,7 @@ abstract class AbstractException extends \Exception {
"Feed/Exception.timeout" => 10505,
"Feed/Exception.forbidden" => 10506,
"Feed/Exception.unauthorized" => 10507,
- "Feed/Exception.malformed" => 10511,
+ "Feed/Exception.malformedXml" => 10511,
"Feed/Exception.xmlEntity" => 10512,
"Feed/Exception.subscriptionNotFound" => 10521,
"Feed/Exception.unsupportedFeedFormat" => 10522,
diff --git a/lib/Conf.php b/lib/Conf.php
index 6a8115c..e3d2f15 100644
--- a/lib/Conf.php
+++ b/lib/Conf.php
@@ -28,10 +28,12 @@ class Conf {
public $userComposeNames = true;
public $userTempPasswordLength = 20;
- public $userAgentString;
+ public $fetchTimeout = 10;
+ public $fetchSizeLimit = 2 * 1024 * 1024;
+ public $fetchUserAgentString;
public function __construct(string $import_file = "") {
- $this->userAgentString = sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)',
+ $this->fetchUserAgentString = sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)',
VERSION, // Arsse version
php_uname('s'), // OS
php_uname('r'), // OS version
diff --git a/lib/Database.php b/lib/Database.php
index 16fe21a..aa6b8c7 100644
--- a/lib/Database.php
+++ b/lib/Database.php
@@ -196,7 +196,7 @@ class Database {
$value = $in;
break;
}
- return (bool) $this->db->prepare("REPLACE INTO arsse_settings(key,value,type) values(?,?,?)", "str", "str", "str")->run($key, $value, $type)->changes();
+ return (bool) $this->db->prepare("REPLACE INTO arsse_settings(key,value,type) values(?,?,?)", "str", "str", "str")->run($key, $value, $type)->changes(); // FIXME: this will not work on PostgreSQL
}
public function settingRemove(string $key): bool {
diff --git a/lib/Feed.php b/lib/Feed.php
index 8c1e9a9..a81b2f3 100644
--- a/lib/Feed.php
+++ b/lib/Feed.php
@@ -44,8 +44,11 @@ class Feed {
public function download(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = ''): bool {
try {
$config = new Config;
- $config->setClientUserAgent(Data::$conf->userAgentString);
- $config->setGrabberUserAgent(Data::$conf->userAgentString);
+ $config->setMaxBodySize(Data::$conf->fetchSizeLimit);
+ $config->setClientTimeout(Data::$conf->fetchTimeout);
+ $config->setGrabberTimeout(Data::$conf->fetchTimeout);
+ $config->setClientUserAgent(Data::$conf->fetchUserAgentString);
+ $config->setGrabberUserAgent(Data::$conf->fetchUserAgentString);
$this->reader = new Reader($config);
$this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);
@@ -102,9 +105,10 @@ class Feed {
$f->titleContentHash = hash('sha256', $f->title.$content);
}
- // If there is an id element then continue. The id is used already.
- $id = (string)$f->xml->id;
+ // If there is an Atom id element use it as the id.
+ $id = (string)$f->xml->children('http://www.w3.org/2005/Atom')->id;
if ($id !== '') {
+ $f->id = hash('sha256', $id);
continue;
}
diff --git a/locale/en.php b/locale/en.php
index aa6e1b2..bc0829e 100644
--- a/locale/en.php
+++ b/locale/en.php
@@ -93,8 +93,8 @@ return [
'Exception.JKingWeb/Arsse/Feed/Exception.timeout' => 'Could not download feed "{url}" because its server timed out',
'Exception.JKingWeb/Arsse/Feed/Exception.forbidden' => 'Could not download feed "{url}" because you do not have permission to access it',
'Exception.JKingWeb/Arsse/Feed/Exception.unauthorized' => 'Could not download feed "{url}" because you provided insufficient or invalid credentials',
- 'Exception.JKingWeb/Arsse/Feed/Exception.malformed' => 'Could not parse feed "{url}" because it is malformed',
+ 'Exception.JKingWeb/Arsse/Feed/Exception.malformedXml' => 'Could not parse feed "{url}" because it is malformed',
'Exception.JKingWeb/Arsse/Feed/Exception.xmlEntity' => 'Refused to parse feed "{url}" because it contains an XXE attack',
'Exception.JKingWeb/Arsse/Feed/Exception.subscriptionNotFound' => 'Unable to find a feed at location "{url}"',
- 'Exception.JKingWeb/Arsse/Feed/Exception.unsupportedFeedFormat' => 'Feed "{url}" is of an unsupported format'
+ 'Exception.JKingWeb/Arsse/Feed/Exception.unsupportedFeedFormat' => 'Feed "{url}" is of an unsupported format',
];
\ No newline at end of file
diff --git a/tests/Feed/TestFeed.php b/tests/Feed/TestFeed.php
index 6fe949b..fabcce5 100644
--- a/tests/Feed/TestFeed.php
+++ b/tests/Feed/TestFeed.php
@@ -13,12 +13,112 @@ class TestFeed extends \PHPUnit\Framework\TestCase {
function setUp() {
if(!@file_get_contents(self::$host."IsUp")) {
$this->markTestSkipped("Test Web server is not accepting requests");
+ } else if(!extension_loaded('curl')) {
+ $this->markTestSkipped("Feed tests are only accurate with curl enabled.");
}
$this->base = self::$host."Feed/";
$this->clearData();
Data::$conf = new Conf();
}
+ function testHandle400() {
+ $this->assertException("unsupportedFeedFormat", "Feed");
+ new Feed(null, $this->base."Fetching/Error?code=400");
+ }
+
+ function testHandle401() {
+ $this->assertException("unauthorized", "Feed");
+ new Feed(null, $this->base."Fetching/Error?code=401");
+ }
+
+ function testHandle403() {
+ $this->assertException("forbidden", "Feed");
+ new Feed(null, $this->base."Fetching/Error?code=403");
+ }
+
+ function testHandle404() {
+ $this->assertException("invalidUrl", "Feed");
+ new Feed(null, $this->base."Fetching/Error?code=404");
+ }
+
+ function testHandle500() {
+ $this->assertException("unsupportedFeedFormat", "Feed");
+ new Feed(null, $this->base."Fetching/Error?code=500");
+ }
+
+ function testHandleARedirectLoop() {
+ $this->assertException("maxRedirect", "Feed");
+ new Feed(null, $this->base."Fetching/EndlessLoop?i=0");
+ }
+
+ function testHandleATimeout() {
+ Data::$conf->fetchTimeout = 1;
+ $this->assertException("timeout", "Feed");
+ new Feed(null, $this->base."Fetching/Timeout");
+ }
+
+ function testHandleAnOverlyLargeFeed() {
+ Data::$conf->fetchSizeLimit = 512;
+ $this->assertException("maxSize", "Feed");
+ new Feed(null, $this->base."Fetching/TooLarge");
+ }
+
+ function testHandleACertificateError() {
+ $this->assertException("invalidCertificate", "Feed");
+ new Feed(null, "https://localhost:8000/");
+ }
+
+ function testParseAFeed() {
+ // test that various properties are set on the feed and on items
+ $f = new Feed(null, $this->base."Parsing/Valid");
+ $this->assertTrue(isset($f->lastModified));
+ $this->assertTrue(isset($f->nextFetch));
+ // check ID preference cascade
+ $h0 = "0a4f0e3768c8a5e9d8d9a16545ae4ff5b097f6dac3ad49555a94a7cace68ba73"; // hash of Atom ID
+ $h1 = "a135beced0236b723d12f845ff20ec22d4fc3afe1130012618f027170d57cb4e"; // hash of RSS2 GUID
+ $h2 = "205e986f4f8b3acfa281227beadb14f5e8c32c8dae4737f888c94c0df49c56f8"; // hash of Dublin Core identifier
+ $this->assertSame($h0, $f->data->items[0]->id);
+ $this->assertSame($h1, $f->data->items[1]->id);
+ $this->assertSame($h2, $f->data->items[2]->id);
+ // check null hashes
+ $h3 = "6287ba30f534e404e68356237e809683e311285d8b9f47d046ac58784eece052"; // URL hash
+ $h4 = "6cbb5d2dcb11610a99eb3f633dc246690c0acf33327bf7534f95542caa8f27c4"; // title hash
+ $h5 = "2b7c57ffa9adde92ccd1884fa1153a5bcd3211e48d99e27be5414cb078e6891c"; // content/enclosure hash
+ $this->assertNotEquals("", $f->data->items[3]->urlTitleHash);
+ $this->assertSame($h3, $f->data->items[3]->urlContentHash);
+ $this->assertSame("", $f->data->items[3]->titleContentHash);
+ $this->assertNotEquals("", $f->data->items[4]->urlTitleHash);
+ $this->assertSame("", $f->data->items[4]->urlContentHash);
+ $this->assertSame($h4, $f->data->items[4]->titleContentHash);
+ $this->assertSame("", $f->data->items[5]->urlTitleHash);
+ $this->assertNotEquals("", $f->data->items[5]->urlContentHash);
+ $this->assertNotEquals("", $f->data->items[5]->titleContentHash);
+ // check null IDs
+ $this->assertSame("", $f->data->items[3]->id);
+ $this->assertSame("", $f->data->items[4]->id);
+ $this->assertSame("", $f->data->items[5]->id);
+ }
+
+ function testParseEntityExpansionAttack() {
+ $this->assertException("xmlEntity", "Feed");
+ new Feed(null, $this->base."Parsing/XEEAttack");
+ }
+
+ function testParseExternalEntityAttack() {
+ $this->assertException("xmlEntity", "Feed");
+ new Feed(null, $this->base."Parsing/XXEAttack");
+ }
+
+ function testParseAnUnsupportedFeed() {
+ $this->assertException("unsupportedFeedFormat", "Feed");
+ new Feed(null, $this->base."Parsing/Unsupported");
+ }
+
+ function testParseAMalformedFeed() {
+ $this->assertException("malformedXml", "Feed");
+ new Feed(null, $this->base."Parsing/Malformed");
+ }
+
function testDeduplicateFeedItems() {
// duplicates with dates lead to the newest match being kept
$t = strtotime("2002-05-19T15:21:36Z");
diff --git a/tests/docroot/Feed/Fetching/EndlessLoop.php b/tests/docroot/Feed/Fetching/EndlessLoop.php
new file mode 100644
index 0000000..4b41a13
--- /dev/null
+++ b/tests/docroot/Feed/Fetching/EndlessLoop.php
@@ -0,0 +1,7 @@
+ 302,
+ 'cache' => false,
+ 'fields' => [
+ 'Location: http://localhost:'.$_SERVER['SERVER_PORT'].$_SERVER['REQUEST_URI']."0",
+ ]
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Fetching/Error.php b/tests/docroot/Feed/Fetching/Error.php
new file mode 100644
index 0000000..339b57d
--- /dev/null
+++ b/tests/docroot/Feed/Fetching/Error.php
@@ -0,0 +1,4 @@
+ (int) $_GET['code'],
+ 'cache' => false,
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Fetching/Timeout.php b/tests/docroot/Feed/Fetching/Timeout.php
new file mode 100644
index 0000000..cb0869a
--- /dev/null
+++ b/tests/docroot/Feed/Fetching/Timeout.php
@@ -0,0 +1,6 @@
+ 404,
+ 'cache' => false,
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Fetching/TooLarge.php b/tests/docroot/Feed/Fetching/TooLarge.php
new file mode 100644
index 0000000..3784c44
--- /dev/null
+++ b/tests/docroot/Feed/Fetching/TooLarge.php
@@ -0,0 +1,18 @@
+
+ '.str_repeat("0", 1024).'
+ ';
+return [
+ 'mime' => "application/rss+xml",
+ 'content' => <<
+
+ Test feed
+ http://example.com/
+ Example newsfeed title
+$item
+
+
+MESSAGE_BODY
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Parsing/Malformed.php b/tests/docroot/Feed/Parsing/Malformed.php
new file mode 100644
index 0000000..f889295
--- /dev/null
+++ b/tests/docroot/Feed/Parsing/Malformed.php
@@ -0,0 +1,6 @@
+ "application/rss+xml",
+ 'content' => <<
+MESSAGE_BODY
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Parsing/Unsupported.php b/tests/docroot/Feed/Parsing/Unsupported.php
new file mode 100644
index 0000000..647f18e
--- /dev/null
+++ b/tests/docroot/Feed/Parsing/Unsupported.php
@@ -0,0 +1,6 @@
+ "application/xml",
+ 'content' => <<
+MESSAGE_BODY
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Parsing/Valid.php b/tests/docroot/Feed/Parsing/Valid.php
new file mode 100644
index 0000000..e4c179b
--- /dev/null
+++ b/tests/docroot/Feed/Parsing/Valid.php
@@ -0,0 +1,35 @@
+ "application/rss+xml",
+ 'content' => <<
+
+ Test feed
+ http://example.com/
+ Example newsfeed title
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+ http://example.com/1
+ urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+ http://example.com/1
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+
+ -
+ http://example.com/2
+
+ -
+ Example title
+
+ -
+ Example content
+
+
+
+
+MESSAGE_BODY
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Parsing/XEEAttack.php b/tests/docroot/Feed/Parsing/XEEAttack.php
new file mode 100644
index 0000000..522f35a
--- /dev/null
+++ b/tests/docroot/Feed/Parsing/XEEAttack.php
@@ -0,0 +1,47 @@
+ "application/rss+xml",
+ 'content' => <<
+
+
+
+
+
+
+
+
+
+]>
+
+
+ Test feed
+ http://example.com/
+ Example newsfeed title
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+ http://example.com/1
+ urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+ http://example.com/1
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+
+ -
+ http://example.com/2
+
+ -
+ Example title
+
+ -
+ Example content
+
+
+
+
+MESSAGE_BODY
+];
\ No newline at end of file
diff --git a/tests/docroot/Feed/Parsing/XXEAttack.php b/tests/docroot/Feed/Parsing/XXEAttack.php
new file mode 100644
index 0000000..9e21a43
--- /dev/null
+++ b/tests/docroot/Feed/Parsing/XXEAttack.php
@@ -0,0 +1,38 @@
+ "application/rss+xml",
+ 'content' => <<
+]>
+
+
+ Test feed
+ http://example.com/
+ &xxe;
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+ http://example.com/1
+ urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+ http://example.com/1
+
+ -
+ urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2
+
+ -
+ http://example.com/2
+
+ -
+ Example title
+
+ -
+ Example content
+
+
+
+
+MESSAGE_BODY
+];
\ No newline at end of file
diff --git a/tests/server.php b/tests/server.php
index 9f4b062..bc77314 100644
--- a/tests/server.php
+++ b/tests/server.php
@@ -25,6 +25,7 @@ which include the following data:
*/
+ignore_user_abort(false);
$defaults = [ // default values for response
'code' => 200,
'content' => "",
diff --git a/tests/test b/tests/test
index 5695b58..c684a5b 100755
--- a/tests/test
+++ b/tests/test
@@ -1,6 +1,6 @@
#! /bin/sh
base=`dirname "$0"`
-php -n -S localhost:8000 "$base/server.php" >/dev/null &
+php -n -S localhost:8000 "$base/server.php" >/dev/null 2>/dev/null &
sleep 1s
php "$base/../vendor/phpunit/phpunit/phpunit" -c "$base/phpunit.xml" $*
sleep 1s
diff --git a/tests/test.bat b/tests/test.bat
index 7e7a18a..f0667b0 100644
--- a/tests/test.bat
+++ b/tests/test.bat
@@ -1,7 +1,7 @@
@echo off
setlocal
set base=%~dp0
-start /b php -n -S localhost:8000 "%base%\server.php" >nul
+start /b php -n -S localhost:8000 "%base%\server.php" >nul 2>nul
timeout /nobreak /t 1 >nul
php "%base%\..\vendor\phpunit\phpunit\phpunit" -c "%base%\phpunit.xml" %*
timeout /nobreak /t 1 >nul