From 3b018c89d128b5f06712cf41f8e389bf6dbc2ed8 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Wed, 2 Aug 2017 18:27:04 -0400 Subject: [PATCH] Implemented cleanup of orphaned feeds; fixes #25 --- lib/Conf.php | 6 +- lib/Database.php | 96 ++++++++++++++---------- lib/REST/NextCloudNews/V1_2.php | 5 +- lib/Service.php | 4 +- sql/SQLite3/0.sql | 1 + tests/REST/NextCloudNews/TestNCNV1_2.php | 11 +++ tests/lib/Database/SeriesFeed.php | 47 ++++++++++-- 7 files changed, 121 insertions(+), 49 deletions(-) diff --git a/lib/Conf.php b/lib/Conf.php index 4be11b8..caf195a 100644 --- a/lib/Conf.php +++ b/lib/Conf.php @@ -74,9 +74,13 @@ class Conf { public $fetchSizeLimit = 2 * 1024 * 1024; /** @var boolean Whether to allow the possibility of fetching full article contents using an item's URL. Whether fetching will actually happen is also governed by a per-feed setting */ public $fetchEnableScraping = true; - /** @var string User-Agent string to use when fetching feeds from foreign servers */ + /** @var string|null User-Agent string to use when fetching feeds from foreign servers */ public $fetchUserAgentString; + /** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours) + * @see https://en.wikipedia.org/wiki/ISO_8601#Durations */ + public $retainFeeds = "PT24H"; + /** Creates a new configuration object * @param string $import_file Optional file to read configuration data from * @see self::importFile() */ diff --git a/lib/Database.php b/lib/Database.php index 3a2bc08..0e3ad9b 100644 --- a/lib/Database.php +++ b/lib/Database.php @@ -147,7 +147,7 @@ class Database { if(!Arsse::$user->authorize("", __FUNCTION__)) { throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => "global"]); } - foreach($this->db->prepare("SELECT id from arsse_users")->run() as $user) { + foreach($this->db->query("SELECT id from arsse_users") as $user) { $out[] = $user['id']; } } @@ -501,7 +501,7 @@ class Database { } public function feedListStale(): array { - $feeds = $this->db->prepare("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->run()->getAll(); + $feeds = $this->db->query("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->getAll(); return array_column($feeds,'id'); } @@ -624,6 +624,24 @@ class Database { return true; } + public function feedCleanup(): bool { + $tr = $this->begin(); + // first unmark any feeds which are no longer orphaned + $this->db->query("UPDATE arsse_feeds set orphaned = null where exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)"); + // next mark any newly orphaned feeds with the current date and time + $this->db->query("UPDATE arsse_feeds set orphaned = CURRENT_TIMESTAMP where orphaned is null and not exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)"); + // finally delete feeds that have been orphaned longer than the retention period + $limit = Date::normalize("now"); + if(Arsse::$conf->retainFeeds) { + // if there is a retention period specified, compute it; otherwise feed are deleted immediatelty + $limit->sub(new \DateInterval(Arsse::$conf->retainFeeds)); + } + $out = (bool) $this->db->prepare("DELETE from arsse_feeds where orphaned <= ?", "datetime")->run($limit); + // commit changes and return + $tr->commit(); + return $out; + } + public function feedMatchLatest(int $feedID, int $count): Db\Result { return $this->db->prepare( "SELECT id, edited, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY modified desc, id desc limit ?", @@ -644,43 +662,6 @@ class Database { )->run($feedID, $ids, $hashesUT, $hashesUC, $hashesTC); } - public function articleStarredCount(string $user, array $context = []): int { - if(!Arsse::$user->authorize($user, __FUNCTION__)) { - throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]); - } - return $this->db->prepare( - "WITH RECURSIVE - user(user) as (SELECT ?), - subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ". - "SELECT count(*) from arsse_marks - join user on user is owner - join arsse_articles on arsse_marks.article is arsse_articles.id - join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id - where starred is 1", - "str" - )->run($user)->getValue(); - } - - public function editionLatest(string $user, Context $context = null): int { - if(!Arsse::$user->authorize($user, __FUNCTION__)) { - throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]); - } - if(!$context) { - $context = new Context; - } - $q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id"); - if($context->subscription()) { - // if a subscription is specified, make sure it exists - $id = $this->subscriptionValidateId($user, $context->subscription)['feed']; - // a simple WHERE clause is required here - $q->setWhere("arsse_feeds.id is ?", "int", $id); - } else { - $q->setCTE("user(user)", "SELECT ?", "str", $user); - $q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed"); - } - return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue(); - } - public function articleList(string $user, Context $context = null): Db\Result { if(!Arsse::$user->authorize($user, __FUNCTION__)) { throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]); @@ -897,6 +878,23 @@ class Database { return (bool) $out; } + public function articleStarredCount(string $user, array $context = []): int { + if(!Arsse::$user->authorize($user, __FUNCTION__)) { + throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]); + } + return $this->db->prepare( + "WITH RECURSIVE + user(user) as (SELECT ?), + subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ". + "SELECT count(*) from arsse_marks + join user on user is owner + join arsse_articles on arsse_marks.article is arsse_articles.id + join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id + where starred is 1", + "str" + )->run($user)->getValue(); + } + protected function articleValidateId(string $user, int $id): array { $out = $this->db->prepare( "SELECT @@ -934,4 +932,24 @@ class Database { } return $out; } + + public function editionLatest(string $user, Context $context = null): int { + if(!Arsse::$user->authorize($user, __FUNCTION__)) { + throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]); + } + if(!$context) { + $context = new Context; + } + $q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id"); + if($context->subscription()) { + // if a subscription is specified, make sure it exists + $id = $this->subscriptionValidateId($user, $context->subscription)['feed']; + // a simple WHERE clause is required here + $q->setWhere("arsse_feeds.id is ?", "int", $id); + } else { + $q->setCTE("user(user)", "SELECT ?", "str", $user); + $q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed"); + } + return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue(); + } } \ No newline at end of file diff --git a/lib/REST/NextCloudNews/V1_2.php b/lib/REST/NextCloudNews/V1_2.php index 8990ad6..9f1c72b 100644 --- a/lib/REST/NextCloudNews/V1_2.php +++ b/lib/REST/NextCloudNews/V1_2.php @@ -3,6 +3,7 @@ declare(strict_types=1); namespace JKingWeb\Arsse\REST\NextCloudNews; use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\User; +use JKingWeb\Arsse\Service; use JKingWeb\Arsse\Misc\Context; use JKingWeb\Arsse\AbstractException; use JKingWeb\Arsse\Db\ExceptionInput; @@ -658,7 +659,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler { if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) { return new Response(403); } - // FIXME: stub + Service::cleanupPre(); return new Response(204); } @@ -684,7 +685,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler { 'version' => self::VERSION, 'arsse_version' => \JKingWeb\Arsse\VERSION, 'warnings' => [ - 'improperlyConfiguredCron' => !\JKingWeb\Arsse\Service::hasCheckedIn(), + 'improperlyConfiguredCron' => !Service::hasCheckedIn(), ] ]); } diff --git a/lib/Service.php b/lib/Service.php index 191bf14..e487657 100644 --- a/lib/Service.php +++ b/lib/Service.php @@ -82,8 +82,8 @@ class Service { } static function cleanupPre(): bool { - // TODO: stub - return true; + // mark unsubscribed feeds as orphaned and delete orphaned feeds that are beyond their retention period + return Arsse::$db->feedCleanup(); } static function cleanupPost():bool { diff --git a/sql/SQLite3/0.sql b/sql/SQLite3/0.sql index 4c69b93..5acf248 100644 --- a/sql/SQLite3/0.sql +++ b/sql/SQLite3/0.sql @@ -43,6 +43,7 @@ create table arsse_feeds( updated text, -- time at which the feed was last fetched modified text, -- time at which the feed last actually changed next_fetch text, -- time at which the feed should next be fetched + orphaned text, -- time at which the feed last had no subscriptions etag text not null default '', -- HTTP ETag hash used for cache validation, changes each time the content changes err_count integer not null default 0, -- count of successive times update resulted in error since last successful update err_msg text, -- last error message diff --git a/tests/REST/NextCloudNews/TestNCNV1_2.php b/tests/REST/NextCloudNews/TestNCNV1_2.php index 753dbbf..bf9edb6 100644 --- a/tests/REST/NextCloudNews/TestNCNV1_2.php +++ b/tests/REST/NextCloudNews/TestNCNV1_2.php @@ -799,4 +799,15 @@ class TestNCNV1_2 extends Test\AbstractTest { $exp = new Response(200, $arr1); $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/status"))); } + + function testCleanUpBeforeUpdate() { + Phake::when(Arsse::$db)->feedCleanup()->thenReturn(true); + $exp = new Response(204); + $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update"))); + Phake::verify(Arsse::$db)->feedCleanup(); + // performing a cleanup when not an admin fails + Phake::when(Arsse::$user)->rightsGet->thenReturn(0); + $exp = new Response(403); + $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update"))); + } } \ No newline at end of file diff --git a/tests/lib/Database/SeriesFeed.php b/tests/lib/Database/SeriesFeed.php index 7545d5c..ebe5795 100644 --- a/tests/lib/Database/SeriesFeed.php +++ b/tests/lib/Database/SeriesFeed.php @@ -33,6 +33,8 @@ trait SeriesFeed { $past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute")); $future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute")); $now = gmdate("Y-m-d H:i:s",strtotime("now")); + $yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day")); + $longago = gmdate("Y-m-d H:i:s",strtotime("now - 2 days")); $this->data = [ 'arsse_users' => [ 'columns' => [ @@ -55,13 +57,36 @@ trait SeriesFeed { 'err_msg' => "str", 'modified' => "datetime", 'next_fetch' => "datetime", + 'orphaned' => "datetime", ], 'rows' => [ - [1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past], - [2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future], - [3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now], - [4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past], - [5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future], + // feeds for update testing + [1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,null], + [2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,null], + [3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,null], + [4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,null], + [5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,null], + // feeds for cleanup testing + [6,"http://example.com/1","",0,"",$now,$future,$longago], + [7,"http://example.com/2","",0,"",$now,$future,$yesterday], + [8,"http://example.com/3","",0,"",$now,$future,null], + [9,"http://example.com/4","",0,"",$now,$future,$past], + ] + ], + 'arsse_subscriptions' => [ + 'columns' => [ + 'owner' => "str", + 'feed' => "int", + ], + 'rows' => [ + // the first five feeds need at least one subscription so they are not involved in the cleanup test + ['john.doe@example.com',1], + ['john.doe@example.com',2], + ['john.doe@example.com',3], + ['john.doe@example.com',4], + ['john.doe@example.com',5], + // one feed previously marked for deletion has a subscription again, and so should not be deleted + ['jane.doe@example.com',6], ] ], 'arsse_articles' => [ @@ -235,4 +260,16 @@ trait SeriesFeed { Arsse::$db->feedUpdate(4); $this->assertSame([1], Arsse::$db->feedListStale()); } + + function testHandleOrphanedFeeds() { + Arsse::$db->feedCleanup(); + $now = gmdate("Y-m-d H:i:s"); + $state = $this->primeExpectations($this->data, [ + 'arsse_feeds' => ["id","orphaned"] + ]); + $state['arsse_feeds']['rows'][5][1] = null; + unset($state['arsse_feeds']['rows'][6]); + $state['arsse_feeds']['rows'][7][1] = $now; + $this->compareExpectations($state); + } } \ No newline at end of file