diff --git a/302EndlessLoop@i=0 b/302EndlessLoop@i=0 new file mode 100644 index 0000000..7d52af1 --- /dev/null +++ b/302EndlessLoop@i=0 @@ -0,0 +1,4 @@ +
+Warning: Unknown: failed to open stream: No such file or directory in Unknown on line 0
+
+Fatal error: Unknown: Failed opening required '%base%\server.php' (include_path='.;C:\php\pear') in Unknown on line 0
diff --git a/lib/AbstractException.php b/lib/AbstractException.php index b8fa079..83d2d3d 100644 --- a/lib/AbstractException.php +++ b/lib/AbstractException.php @@ -64,7 +64,7 @@ abstract class AbstractException extends \Exception { "Feed/Exception.timeout" => 10505, "Feed/Exception.forbidden" => 10506, "Feed/Exception.unauthorized" => 10507, - "Feed/Exception.malformed" => 10511, + "Feed/Exception.malformedXml" => 10511, "Feed/Exception.xmlEntity" => 10512, "Feed/Exception.subscriptionNotFound" => 10521, "Feed/Exception.unsupportedFeedFormat" => 10522, diff --git a/lib/Conf.php b/lib/Conf.php index 6a8115c..e3d2f15 100644 --- a/lib/Conf.php +++ b/lib/Conf.php @@ -28,10 +28,12 @@ class Conf { public $userComposeNames = true; public $userTempPasswordLength = 20; - public $userAgentString; + public $fetchTimeout = 10; + public $fetchSizeLimit = 2 * 1024 * 1024; + public $fetchUserAgentString; public function __construct(string $import_file = "") { - $this->userAgentString = sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)', + $this->fetchUserAgentString = sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)', VERSION, // Arsse version php_uname('s'), // OS php_uname('r'), // OS version diff --git a/lib/Database.php b/lib/Database.php index 16fe21a..aa6b8c7 100644 --- a/lib/Database.php +++ b/lib/Database.php @@ -196,7 +196,7 @@ class Database { $value = $in; break; } - return (bool) $this->db->prepare("REPLACE INTO arsse_settings(key,value,type) values(?,?,?)", "str", "str", "str")->run($key, $value, $type)->changes(); + return (bool) $this->db->prepare("REPLACE INTO arsse_settings(key,value,type) values(?,?,?)", "str", "str", "str")->run($key, $value, $type)->changes(); // FIXME: this will not work on PostgreSQL } public function settingRemove(string $key): bool { diff --git a/lib/Feed.php b/lib/Feed.php index 8c1e9a9..a81b2f3 100644 --- a/lib/Feed.php +++ b/lib/Feed.php @@ -44,8 +44,11 @@ class Feed { public function download(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = ''): bool { try { $config = new Config; - $config->setClientUserAgent(Data::$conf->userAgentString); - $config->setGrabberUserAgent(Data::$conf->userAgentString); + $config->setMaxBodySize(Data::$conf->fetchSizeLimit); + $config->setClientTimeout(Data::$conf->fetchTimeout); + $config->setGrabberTimeout(Data::$conf->fetchTimeout); + $config->setClientUserAgent(Data::$conf->fetchUserAgentString); + $config->setGrabberUserAgent(Data::$conf->fetchUserAgentString); $this->reader = new Reader($config); $this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password); @@ -102,9 +105,10 @@ class Feed { $f->titleContentHash = hash('sha256', $f->title.$content); } - // If there is an id element then continue. The id is used already. - $id = (string)$f->xml->id; + // If there is an Atom id element use it as the id. + $id = (string)$f->xml->children('http://www.w3.org/2005/Atom')->id; if ($id !== '') { + $f->id = hash('sha256', $id); continue; } diff --git a/locale/en.php b/locale/en.php index aa6e1b2..bc0829e 100644 --- a/locale/en.php +++ b/locale/en.php @@ -93,8 +93,8 @@ return [ 'Exception.JKingWeb/Arsse/Feed/Exception.timeout' => 'Could not download feed "{url}" because its server timed out', 'Exception.JKingWeb/Arsse/Feed/Exception.forbidden' => 'Could not download feed "{url}" because you do not have permission to access it', 'Exception.JKingWeb/Arsse/Feed/Exception.unauthorized' => 'Could not download feed "{url}" because you provided insufficient or invalid credentials', - 'Exception.JKingWeb/Arsse/Feed/Exception.malformed' => 'Could not parse feed "{url}" because it is malformed', + 'Exception.JKingWeb/Arsse/Feed/Exception.malformedXml' => 'Could not parse feed "{url}" because it is malformed', 'Exception.JKingWeb/Arsse/Feed/Exception.xmlEntity' => 'Refused to parse feed "{url}" because it contains an XXE attack', 'Exception.JKingWeb/Arsse/Feed/Exception.subscriptionNotFound' => 'Unable to find a feed at location "{url}"', - 'Exception.JKingWeb/Arsse/Feed/Exception.unsupportedFeedFormat' => 'Feed "{url}" is of an unsupported format' + 'Exception.JKingWeb/Arsse/Feed/Exception.unsupportedFeedFormat' => 'Feed "{url}" is of an unsupported format', ]; \ No newline at end of file diff --git a/tests/Feed/TestFeed.php b/tests/Feed/TestFeed.php index 6fe949b..fabcce5 100644 --- a/tests/Feed/TestFeed.php +++ b/tests/Feed/TestFeed.php @@ -13,12 +13,112 @@ class TestFeed extends \PHPUnit\Framework\TestCase { function setUp() { if(!@file_get_contents(self::$host."IsUp")) { $this->markTestSkipped("Test Web server is not accepting requests"); + } else if(!extension_loaded('curl')) { + $this->markTestSkipped("Feed tests are only accurate with curl enabled."); } $this->base = self::$host."Feed/"; $this->clearData(); Data::$conf = new Conf(); } + function testHandle400() { + $this->assertException("unsupportedFeedFormat", "Feed"); + new Feed(null, $this->base."Fetching/Error?code=400"); + } + + function testHandle401() { + $this->assertException("unauthorized", "Feed"); + new Feed(null, $this->base."Fetching/Error?code=401"); + } + + function testHandle403() { + $this->assertException("forbidden", "Feed"); + new Feed(null, $this->base."Fetching/Error?code=403"); + } + + function testHandle404() { + $this->assertException("invalidUrl", "Feed"); + new Feed(null, $this->base."Fetching/Error?code=404"); + } + + function testHandle500() { + $this->assertException("unsupportedFeedFormat", "Feed"); + new Feed(null, $this->base."Fetching/Error?code=500"); + } + + function testHandleARedirectLoop() { + $this->assertException("maxRedirect", "Feed"); + new Feed(null, $this->base."Fetching/EndlessLoop?i=0"); + } + + function testHandleATimeout() { + Data::$conf->fetchTimeout = 1; + $this->assertException("timeout", "Feed"); + new Feed(null, $this->base."Fetching/Timeout"); + } + + function testHandleAnOverlyLargeFeed() { + Data::$conf->fetchSizeLimit = 512; + $this->assertException("maxSize", "Feed"); + new Feed(null, $this->base."Fetching/TooLarge"); + } + + function testHandleACertificateError() { + $this->assertException("invalidCertificate", "Feed"); + new Feed(null, "https://localhost:8000/"); + } + + function testParseAFeed() { + // test that various properties are set on the feed and on items + $f = new Feed(null, $this->base."Parsing/Valid"); + $this->assertTrue(isset($f->lastModified)); + $this->assertTrue(isset($f->nextFetch)); + // check ID preference cascade + $h0 = "0a4f0e3768c8a5e9d8d9a16545ae4ff5b097f6dac3ad49555a94a7cace68ba73"; // hash of Atom ID + $h1 = "a135beced0236b723d12f845ff20ec22d4fc3afe1130012618f027170d57cb4e"; // hash of RSS2 GUID + $h2 = "205e986f4f8b3acfa281227beadb14f5e8c32c8dae4737f888c94c0df49c56f8"; // hash of Dublin Core identifier + $this->assertSame($h0, $f->data->items[0]->id); + $this->assertSame($h1, $f->data->items[1]->id); + $this->assertSame($h2, $f->data->items[2]->id); + // check null hashes + $h3 = "6287ba30f534e404e68356237e809683e311285d8b9f47d046ac58784eece052"; // URL hash + $h4 = "6cbb5d2dcb11610a99eb3f633dc246690c0acf33327bf7534f95542caa8f27c4"; // title hash + $h5 = "2b7c57ffa9adde92ccd1884fa1153a5bcd3211e48d99e27be5414cb078e6891c"; // content/enclosure hash + $this->assertNotEquals("", $f->data->items[3]->urlTitleHash); + $this->assertSame($h3, $f->data->items[3]->urlContentHash); + $this->assertSame("", $f->data->items[3]->titleContentHash); + $this->assertNotEquals("", $f->data->items[4]->urlTitleHash); + $this->assertSame("", $f->data->items[4]->urlContentHash); + $this->assertSame($h4, $f->data->items[4]->titleContentHash); + $this->assertSame("", $f->data->items[5]->urlTitleHash); + $this->assertNotEquals("", $f->data->items[5]->urlContentHash); + $this->assertNotEquals("", $f->data->items[5]->titleContentHash); + // check null IDs + $this->assertSame("", $f->data->items[3]->id); + $this->assertSame("", $f->data->items[4]->id); + $this->assertSame("", $f->data->items[5]->id); + } + + function testParseEntityExpansionAttack() { + $this->assertException("xmlEntity", "Feed"); + new Feed(null, $this->base."Parsing/XEEAttack"); + } + + function testParseExternalEntityAttack() { + $this->assertException("xmlEntity", "Feed"); + new Feed(null, $this->base."Parsing/XXEAttack"); + } + + function testParseAnUnsupportedFeed() { + $this->assertException("unsupportedFeedFormat", "Feed"); + new Feed(null, $this->base."Parsing/Unsupported"); + } + + function testParseAMalformedFeed() { + $this->assertException("malformedXml", "Feed"); + new Feed(null, $this->base."Parsing/Malformed"); + } + function testDeduplicateFeedItems() { // duplicates with dates lead to the newest match being kept $t = strtotime("2002-05-19T15:21:36Z"); diff --git a/tests/docroot/Feed/Fetching/EndlessLoop.php b/tests/docroot/Feed/Fetching/EndlessLoop.php new file mode 100644 index 0000000..4b41a13 --- /dev/null +++ b/tests/docroot/Feed/Fetching/EndlessLoop.php @@ -0,0 +1,7 @@ + 302, + 'cache' => false, + 'fields' => [ + 'Location: http://localhost:'.$_SERVER['SERVER_PORT'].$_SERVER['REQUEST_URI']."0", + ] +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Fetching/Error.php b/tests/docroot/Feed/Fetching/Error.php new file mode 100644 index 0000000..339b57d --- /dev/null +++ b/tests/docroot/Feed/Fetching/Error.php @@ -0,0 +1,4 @@ + (int) $_GET['code'], + 'cache' => false, +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Fetching/Timeout.php b/tests/docroot/Feed/Fetching/Timeout.php new file mode 100644 index 0000000..cb0869a --- /dev/null +++ b/tests/docroot/Feed/Fetching/Timeout.php @@ -0,0 +1,6 @@ + 404, + 'cache' => false, +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Fetching/TooLarge.php b/tests/docroot/Feed/Fetching/TooLarge.php new file mode 100644 index 0000000..3784c44 --- /dev/null +++ b/tests/docroot/Feed/Fetching/TooLarge.php @@ -0,0 +1,18 @@ + + '.str_repeat("0", 1024).' + '; +return [ + 'mime' => "application/rss+xml", + 'content' => << + + Test feed + http://example.com/ + Example newsfeed title +$item + + +MESSAGE_BODY +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Parsing/Malformed.php b/tests/docroot/Feed/Parsing/Malformed.php new file mode 100644 index 0000000..f889295 --- /dev/null +++ b/tests/docroot/Feed/Parsing/Malformed.php @@ -0,0 +1,6 @@ + "application/rss+xml", + 'content' => << +MESSAGE_BODY +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Parsing/Unsupported.php b/tests/docroot/Feed/Parsing/Unsupported.php new file mode 100644 index 0000000..647f18e --- /dev/null +++ b/tests/docroot/Feed/Parsing/Unsupported.php @@ -0,0 +1,6 @@ + "application/xml", + 'content' => << +MESSAGE_BODY +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Parsing/Valid.php b/tests/docroot/Feed/Parsing/Valid.php new file mode 100644 index 0000000..e4c179b --- /dev/null +++ b/tests/docroot/Feed/Parsing/Valid.php @@ -0,0 +1,35 @@ + "application/rss+xml", + 'content' => << + + Test feed + http://example.com/ + Example newsfeed title + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + http://example.com/1 + urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + http://example.com/1 + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + + + http://example.com/2 + + + Example title + + + Example content + + + + +MESSAGE_BODY +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Parsing/XEEAttack.php b/tests/docroot/Feed/Parsing/XEEAttack.php new file mode 100644 index 0000000..522f35a --- /dev/null +++ b/tests/docroot/Feed/Parsing/XEEAttack.php @@ -0,0 +1,47 @@ + "application/rss+xml", + 'content' => << + + + + + + + + + +]> + + + Test feed + http://example.com/ + Example newsfeed title + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + http://example.com/1 + urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + http://example.com/1 + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + + + http://example.com/2 + + + Example title + + + Example content + + + + +MESSAGE_BODY +]; \ No newline at end of file diff --git a/tests/docroot/Feed/Parsing/XXEAttack.php b/tests/docroot/Feed/Parsing/XXEAttack.php new file mode 100644 index 0000000..9e21a43 --- /dev/null +++ b/tests/docroot/Feed/Parsing/XXEAttack.php @@ -0,0 +1,38 @@ + "application/rss+xml", + 'content' => << +]> + + + Test feed + http://example.com/ + &xxe; + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + http://example.com/1 + urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + http://example.com/1 + + + urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2 + + + http://example.com/2 + + + Example title + + + Example content + + + + +MESSAGE_BODY +]; \ No newline at end of file diff --git a/tests/server.php b/tests/server.php index 9f4b062..bc77314 100644 --- a/tests/server.php +++ b/tests/server.php @@ -25,6 +25,7 @@ which include the following data: */ +ignore_user_abort(false); $defaults = [ // default values for response 'code' => 200, 'content' => "", diff --git a/tests/test b/tests/test index 5695b58..c684a5b 100755 --- a/tests/test +++ b/tests/test @@ -1,6 +1,6 @@ #! /bin/sh base=`dirname "$0"` -php -n -S localhost:8000 "$base/server.php" >/dev/null & +php -n -S localhost:8000 "$base/server.php" >/dev/null 2>/dev/null & sleep 1s php "$base/../vendor/phpunit/phpunit/phpunit" -c "$base/phpunit.xml" $* sleep 1s diff --git a/tests/test.bat b/tests/test.bat index 7e7a18a..f0667b0 100644 --- a/tests/test.bat +++ b/tests/test.bat @@ -1,7 +1,7 @@ @echo off setlocal set base=%~dp0 -start /b php -n -S localhost:8000 "%base%\server.php" >nul +start /b php -n -S localhost:8000 "%base%\server.php" >nul 2>nul timeout /nobreak /t 1 >nul php "%base%\..\vendor\phpunit\phpunit\phpunit" -c "%base%\phpunit.xml" %* timeout /nobreak /t 1 >nul