Implemented cleanup of orphaned feeds; fixes #25
This commit is contained in:
parent
0773e21034
commit
3b018c89d1
7 changed files with 121 additions and 49 deletions
|
@ -74,9 +74,13 @@ class Conf {
|
|||
public $fetchSizeLimit = 2 * 1024 * 1024;
|
||||
/** @var boolean Whether to allow the possibility of fetching full article contents using an item's URL. Whether fetching will actually happen is also governed by a per-feed setting */
|
||||
public $fetchEnableScraping = true;
|
||||
/** @var string User-Agent string to use when fetching feeds from foreign servers */
|
||||
/** @var string|null User-Agent string to use when fetching feeds from foreign servers */
|
||||
public $fetchUserAgentString;
|
||||
|
||||
/** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours)
|
||||
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
|
||||
public $retainFeeds = "PT24H";
|
||||
|
||||
/** Creates a new configuration object
|
||||
* @param string $import_file Optional file to read configuration data from
|
||||
* @see self::importFile() */
|
||||
|
|
|
@ -147,7 +147,7 @@ class Database {
|
|||
if(!Arsse::$user->authorize("", __FUNCTION__)) {
|
||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => "global"]);
|
||||
}
|
||||
foreach($this->db->prepare("SELECT id from arsse_users")->run() as $user) {
|
||||
foreach($this->db->query("SELECT id from arsse_users") as $user) {
|
||||
$out[] = $user['id'];
|
||||
}
|
||||
}
|
||||
|
@ -501,7 +501,7 @@ class Database {
|
|||
}
|
||||
|
||||
public function feedListStale(): array {
|
||||
$feeds = $this->db->prepare("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->run()->getAll();
|
||||
$feeds = $this->db->query("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->getAll();
|
||||
return array_column($feeds,'id');
|
||||
}
|
||||
|
||||
|
@ -624,6 +624,24 @@ class Database {
|
|||
return true;
|
||||
}
|
||||
|
||||
public function feedCleanup(): bool {
|
||||
$tr = $this->begin();
|
||||
// first unmark any feeds which are no longer orphaned
|
||||
$this->db->query("UPDATE arsse_feeds set orphaned = null where exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)");
|
||||
// next mark any newly orphaned feeds with the current date and time
|
||||
$this->db->query("UPDATE arsse_feeds set orphaned = CURRENT_TIMESTAMP where orphaned is null and not exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)");
|
||||
// finally delete feeds that have been orphaned longer than the retention period
|
||||
$limit = Date::normalize("now");
|
||||
if(Arsse::$conf->retainFeeds) {
|
||||
// if there is a retention period specified, compute it; otherwise feed are deleted immediatelty
|
||||
$limit->sub(new \DateInterval(Arsse::$conf->retainFeeds));
|
||||
}
|
||||
$out = (bool) $this->db->prepare("DELETE from arsse_feeds where orphaned <= ?", "datetime")->run($limit);
|
||||
// commit changes and return
|
||||
$tr->commit();
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function feedMatchLatest(int $feedID, int $count): Db\Result {
|
||||
return $this->db->prepare(
|
||||
"SELECT id, edited, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY modified desc, id desc limit ?",
|
||||
|
@ -644,43 +662,6 @@ class Database {
|
|||
)->run($feedID, $ids, $hashesUT, $hashesUC, $hashesTC);
|
||||
}
|
||||
|
||||
public function articleStarredCount(string $user, array $context = []): int {
|
||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||
}
|
||||
return $this->db->prepare(
|
||||
"WITH RECURSIVE
|
||||
user(user) as (SELECT ?),
|
||||
subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ".
|
||||
"SELECT count(*) from arsse_marks
|
||||
join user on user is owner
|
||||
join arsse_articles on arsse_marks.article is arsse_articles.id
|
||||
join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id
|
||||
where starred is 1",
|
||||
"str"
|
||||
)->run($user)->getValue();
|
||||
}
|
||||
|
||||
public function editionLatest(string $user, Context $context = null): int {
|
||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||
}
|
||||
if(!$context) {
|
||||
$context = new Context;
|
||||
}
|
||||
$q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id");
|
||||
if($context->subscription()) {
|
||||
// if a subscription is specified, make sure it exists
|
||||
$id = $this->subscriptionValidateId($user, $context->subscription)['feed'];
|
||||
// a simple WHERE clause is required here
|
||||
$q->setWhere("arsse_feeds.id is ?", "int", $id);
|
||||
} else {
|
||||
$q->setCTE("user(user)", "SELECT ?", "str", $user);
|
||||
$q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed");
|
||||
}
|
||||
return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue();
|
||||
}
|
||||
|
||||
public function articleList(string $user, Context $context = null): Db\Result {
|
||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||
|
@ -897,6 +878,23 @@ class Database {
|
|||
return (bool) $out;
|
||||
}
|
||||
|
||||
public function articleStarredCount(string $user, array $context = []): int {
|
||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||
}
|
||||
return $this->db->prepare(
|
||||
"WITH RECURSIVE
|
||||
user(user) as (SELECT ?),
|
||||
subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ".
|
||||
"SELECT count(*) from arsse_marks
|
||||
join user on user is owner
|
||||
join arsse_articles on arsse_marks.article is arsse_articles.id
|
||||
join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id
|
||||
where starred is 1",
|
||||
"str"
|
||||
)->run($user)->getValue();
|
||||
}
|
||||
|
||||
protected function articleValidateId(string $user, int $id): array {
|
||||
$out = $this->db->prepare(
|
||||
"SELECT
|
||||
|
@ -934,4 +932,24 @@ class Database {
|
|||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function editionLatest(string $user, Context $context = null): int {
|
||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||
}
|
||||
if(!$context) {
|
||||
$context = new Context;
|
||||
}
|
||||
$q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id");
|
||||
if($context->subscription()) {
|
||||
// if a subscription is specified, make sure it exists
|
||||
$id = $this->subscriptionValidateId($user, $context->subscription)['feed'];
|
||||
// a simple WHERE clause is required here
|
||||
$q->setWhere("arsse_feeds.id is ?", "int", $id);
|
||||
} else {
|
||||
$q->setCTE("user(user)", "SELECT ?", "str", $user);
|
||||
$q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed");
|
||||
}
|
||||
return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue();
|
||||
}
|
||||
}
|
|
@ -3,6 +3,7 @@ declare(strict_types=1);
|
|||
namespace JKingWeb\Arsse\REST\NextCloudNews;
|
||||
use JKingWeb\Arsse\Arsse;
|
||||
use JKingWeb\Arsse\User;
|
||||
use JKingWeb\Arsse\Service;
|
||||
use JKingWeb\Arsse\Misc\Context;
|
||||
use JKingWeb\Arsse\AbstractException;
|
||||
use JKingWeb\Arsse\Db\ExceptionInput;
|
||||
|
@ -658,7 +659,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
|
|||
if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) {
|
||||
return new Response(403);
|
||||
}
|
||||
// FIXME: stub
|
||||
Service::cleanupPre();
|
||||
return new Response(204);
|
||||
}
|
||||
|
||||
|
@ -684,7 +685,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
|
|||
'version' => self::VERSION,
|
||||
'arsse_version' => \JKingWeb\Arsse\VERSION,
|
||||
'warnings' => [
|
||||
'improperlyConfiguredCron' => !\JKingWeb\Arsse\Service::hasCheckedIn(),
|
||||
'improperlyConfiguredCron' => !Service::hasCheckedIn(),
|
||||
]
|
||||
]);
|
||||
}
|
||||
|
|
|
@ -82,8 +82,8 @@ class Service {
|
|||
}
|
||||
|
||||
static function cleanupPre(): bool {
|
||||
// TODO: stub
|
||||
return true;
|
||||
// mark unsubscribed feeds as orphaned and delete orphaned feeds that are beyond their retention period
|
||||
return Arsse::$db->feedCleanup();
|
||||
}
|
||||
|
||||
static function cleanupPost():bool {
|
||||
|
|
|
@ -43,6 +43,7 @@ create table arsse_feeds(
|
|||
updated text, -- time at which the feed was last fetched
|
||||
modified text, -- time at which the feed last actually changed
|
||||
next_fetch text, -- time at which the feed should next be fetched
|
||||
orphaned text, -- time at which the feed last had no subscriptions
|
||||
etag text not null default '', -- HTTP ETag hash used for cache validation, changes each time the content changes
|
||||
err_count integer not null default 0, -- count of successive times update resulted in error since last successful update
|
||||
err_msg text, -- last error message
|
||||
|
|
|
@ -799,4 +799,15 @@ class TestNCNV1_2 extends Test\AbstractTest {
|
|||
$exp = new Response(200, $arr1);
|
||||
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/status")));
|
||||
}
|
||||
|
||||
function testCleanUpBeforeUpdate() {
|
||||
Phake::when(Arsse::$db)->feedCleanup()->thenReturn(true);
|
||||
$exp = new Response(204);
|
||||
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
|
||||
Phake::verify(Arsse::$db)->feedCleanup();
|
||||
// performing a cleanup when not an admin fails
|
||||
Phake::when(Arsse::$user)->rightsGet->thenReturn(0);
|
||||
$exp = new Response(403);
|
||||
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
|
||||
}
|
||||
}
|
|
@ -33,6 +33,8 @@ trait SeriesFeed {
|
|||
$past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
|
||||
$future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute"));
|
||||
$now = gmdate("Y-m-d H:i:s",strtotime("now"));
|
||||
$yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day"));
|
||||
$longago = gmdate("Y-m-d H:i:s",strtotime("now - 2 days"));
|
||||
$this->data = [
|
||||
'arsse_users' => [
|
||||
'columns' => [
|
||||
|
@ -55,13 +57,36 @@ trait SeriesFeed {
|
|||
'err_msg' => "str",
|
||||
'modified' => "datetime",
|
||||
'next_fetch' => "datetime",
|
||||
'orphaned' => "datetime",
|
||||
],
|
||||
'rows' => [
|
||||
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past],
|
||||
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future],
|
||||
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now],
|
||||
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past],
|
||||
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future],
|
||||
// feeds for update testing
|
||||
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,null],
|
||||
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,null],
|
||||
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,null],
|
||||
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,null],
|
||||
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,null],
|
||||
// feeds for cleanup testing
|
||||
[6,"http://example.com/1","",0,"",$now,$future,$longago],
|
||||
[7,"http://example.com/2","",0,"",$now,$future,$yesterday],
|
||||
[8,"http://example.com/3","",0,"",$now,$future,null],
|
||||
[9,"http://example.com/4","",0,"",$now,$future,$past],
|
||||
]
|
||||
],
|
||||
'arsse_subscriptions' => [
|
||||
'columns' => [
|
||||
'owner' => "str",
|
||||
'feed' => "int",
|
||||
],
|
||||
'rows' => [
|
||||
// the first five feeds need at least one subscription so they are not involved in the cleanup test
|
||||
['john.doe@example.com',1],
|
||||
['john.doe@example.com',2],
|
||||
['john.doe@example.com',3],
|
||||
['john.doe@example.com',4],
|
||||
['john.doe@example.com',5],
|
||||
// one feed previously marked for deletion has a subscription again, and so should not be deleted
|
||||
['jane.doe@example.com',6],
|
||||
]
|
||||
],
|
||||
'arsse_articles' => [
|
||||
|
@ -235,4 +260,16 @@ trait SeriesFeed {
|
|||
Arsse::$db->feedUpdate(4);
|
||||
$this->assertSame([1], Arsse::$db->feedListStale());
|
||||
}
|
||||
|
||||
function testHandleOrphanedFeeds() {
|
||||
Arsse::$db->feedCleanup();
|
||||
$now = gmdate("Y-m-d H:i:s");
|
||||
$state = $this->primeExpectations($this->data, [
|
||||
'arsse_feeds' => ["id","orphaned"]
|
||||
]);
|
||||
$state['arsse_feeds']['rows'][5][1] = null;
|
||||
unset($state['arsse_feeds']['rows'][6]);
|
||||
$state['arsse_feeds']['rows'][7][1] = $now;
|
||||
$this->compareExpectations($state);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue