From e7b2f541839270183e267a2bd280995ee080d4e8 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Tue, 19 Jan 2021 23:17:03 -0500 Subject: [PATCH] Prototype feed creation --- .../030_Supported_Protocols/005_Miniflux.md | 5 +- lib/Database.php | 15 +++-- lib/REST/Miniflux/V1.php | 66 ++++++++++++++++--- locale/en.php | 4 +- tests/cases/Database/SeriesSubscription.php | 6 +- tests/cases/REST/Miniflux/TestV1.php | 22 ++++--- 6 files changed, 90 insertions(+), 28 deletions(-) diff --git a/docs/en/030_Supported_Protocols/005_Miniflux.md b/docs/en/030_Supported_Protocols/005_Miniflux.md index a85a61e..c3a1921 100644 --- a/docs/en/030_Supported_Protocols/005_Miniflux.md +++ b/docs/en/030_Supported_Protocols/005_Miniflux.md @@ -25,16 +25,17 @@ Miniflux version 2.0.27 is emulated, though not all features are implemented - Custom User-Agent strings - The `disabled`, `ignore_http_cache`, and `fetch_via_proxy` flags - Changing the URL, username, or password of a feed +- Titles and types are not available during feed discovery and are filled with generic data # Differences -- Various error messages differ due to significant implementation differences +- Various error codes and messages differ due to significant implementation differences - `PUT` requests which return a body respond with `200 OK` rather than `201 Created` -- Only the URL should be considered reliable in feed discovery results - The "All" category is treated specially (see below for details) - Category names consisting only of whitespace are rejected along with the empty string - Filtering rules may not function identically (see below for details) - The `checked_at` field of feeds indicates when the feed was last updated rather than when it was last checked +- Creating a feed with the `scrape` property set to `true` might not return scraped content for the initial synchronization # Behaviour of filtering (block and keep) rules diff --git a/lib/Database.php b/lib/Database.php index 3866dda..de829db 100644 --- a/lib/Database.php +++ b/lib/Database.php @@ -745,10 +745,11 @@ class Database { * @param string $fetchUser The user name required to access the newsfeed, if applicable * @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext * @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document + * @param boolean $scrape Whether the initial synchronization should scrape full-article content */ - public function subscriptionAdd(string $user, string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true): int { + public function subscriptionAdd(string $user, string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true, bool $scrape = false): int { // get the ID of the underlying feed, or add it if it's not yet in the database - $feedID = $this->feedAdd($url, $fetchUser, $fetchPassword, $discover); + $feedID = $this->feedAdd($url, $fetchUser, $fetchPassword, $discover, $scrape); // Add the feed to the user's subscriptions and return the new subscription's ID. return $this->db->prepare('INSERT INTO arsse_subscriptions(owner,feed) values(?,?)', 'str', 'int')->run($user, $feedID)->lastId(); } @@ -1089,8 +1090,9 @@ class Database { * @param string $fetchUser The user name required to access the newsfeed, if applicable * @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext * @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document + * @param boolean $scrape Whether the initial synchronization should scrape full-article content */ - public function feedAdd(string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true): int { + public function feedAdd(string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true, bool $scrape = false): int { // normalize the input URL $url = URL::normalize($url); // check to see if the feed already exists @@ -1106,7 +1108,7 @@ class Database { $feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId(); try { // perform an initial update on the newly added feed - $this->feedUpdate($feedID, true); + $this->feedUpdate($feedID, true, $scrape); } catch (\Throwable $e) { // if the update fails, delete the feed we just added $this->db->prepare('DELETE from arsse_feeds where id = ?', 'int')->run($feedID); @@ -1126,8 +1128,9 @@ class Database { * * @param integer $feedID The numerical identifier of the newsfeed to refresh * @param boolean $throwError Whether to throw an exception on failure in addition to storing error information in the database + * @param boolean|null $scrapeOverride If not null, overrides information in the database signaling whether or not to scrape full-article content. This is intended for when there are no subscriptions for the feed in the database yet */ - public function feedUpdate($feedID, bool $throwError = false): bool { + public function feedUpdate($feedID, bool $throwError = false, ?bool $scrapeOverride = null): bool { // check to make sure the feed exists if (!V::id($feedID)) { throw new Db\ExceptionInput("typeViolation", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID, 'type' => "int > 0"]); @@ -1144,7 +1147,7 @@ class Database { throw new Db\ExceptionInput("subjectMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]); } // determine whether the feed's items should be scraped for full content from the source Web site - $scrape = (Arsse::$conf->fetchEnableScraping && $f['scrapers']); + $scrape = (Arsse::$conf->fetchEnableScraping && ($scrapeOverride ?? $f['scrapers'])); // the Feed object throws an exception when there are problems, but that isn't ideal // here. When an exception is thrown it should update the database with the // error instead of failing; if other exceptions are thrown, we should simply roll back diff --git a/lib/REST/Miniflux/V1.php b/lib/REST/Miniflux/V1.php index 43d9e36..7d481da 100644 --- a/lib/REST/Miniflux/V1.php +++ b/lib/REST/Miniflux/V1.php @@ -14,8 +14,10 @@ use JKingWeb\Arsse\Context\Context; use JKingWeb\Arsse\Db\ExceptionInput; use JKingWeb\Arsse\Misc\HTTP; use JKingWeb\Arsse\Misc\Date; +use JKingWeb\Arsse\Misc\URL; use JKingWeb\Arsse\Misc\ValueInfo as V; use JKingWeb\Arsse\REST\Exception; +use JKingWeb\Arsse\Rule\Rule; use JKingWeb\Arsse\User\ExceptionConflict; use JKingWeb\Arsse\User\Exception as UserException; use Psr\Http\Message\ServerRequestInterface; @@ -31,12 +33,25 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler { protected const ACCEPTED_TYPES_JSON = ["application/json"]; protected const TOKEN_LENGTH = 32; protected const VALID_JSON = [ - // user properties which map directly to Arsse user metadata are listed separately - 'url' => "string", - 'username' => "string", - 'password' => "string", - 'user_agent' => "string", - 'title' => "string", + // user properties which map directly to Arsse user metadata are listed separately; + // not all these properties are used by our implementation, but they are treated + // with the same strictness as in Miniflux to ease cross-compatibility + 'url' => "string", + 'username' => "string", + 'password' => "string", + 'user_agent' => "string", + 'title' => "string", + 'feed_url' => "string", + 'category_id' => "integer", + 'crawler' => "boolean", + 'user_agent' => "string", + 'scraper_rules' => "string", + 'rewrite_rules' => "string", + 'keeplist_rules' => "string", + 'blocklist_rules' => "string", + 'disabled' => "boolean", + 'ignore_http_cache' => "boolean", + 'fetch_via_proxy' => "boolean", ]; protected const USER_META_MAP = [ // Miniflux ID // Arsse ID Default value Extra @@ -81,7 +96,7 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler { ], '/feeds' => [ 'GET' => ["getFeeds", false, false, false, false, []], - 'POST' => ["createFeed", false, false, true, false, []], + 'POST' => ["createFeed", false, false, true, false, ["feed_url", "category_id"]], ], '/feeds/1' => [ 'GET' => ["getFeed", false, true, false, false, []], @@ -263,6 +278,10 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler { $body[$k] = null; } elseif (gettype($body[$k]) !== $t) { return new ErrorResponse(["InvalidInputType", 'field' => $k, 'expected' => $t, 'actual' => gettype($body[$k])], 422); + } elseif (in_array($k, ["keeplist_rules", "blocklist_rules"]) && !Rule::validate($body[$k])) { + return new ErrorResponse(["InvalidInputValue", 'field' => $k], 422); + } elseif (in_array($k, ["url", "feed_url"]) && !URL::absolute($body[$k])) { + return new ErrorResponse(["InvalidInputValue", 'field' => $k], 422); } } //normalize user-specific input @@ -377,7 +396,7 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler { 10506 => "Fetch403", 10507 => "Fetch401", ][$e->getCode()] ?? "FetchOther"; - return new ErrorResponse($msg, 500); + return new ErrorResponse($msg, 502); } $out = []; foreach ($list as $url) { @@ -646,6 +665,37 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler { return new Response($out); } + protected function createFeed(array $data): ResponseInterface { + $props = [ + 'keep_rule' => $data['keeplist_rules'], + 'block_rule' => $data['blocklist_rules'], + 'folder' => $data['category_id'] - 1, + 'scrape' => (bool) $data['crawler'], + ]; + try { + Arsse::$db->feedAdd($data['feed_url'], (string) $data['username'], (string) $data['password'], false, (bool) $data['crawler']); + $tr = Arsse::$db->begin(); + $id = Arsse::$db->subscriptionAdd(Arsse::$user->id, $data['feed_url'], (string) $data['username'], (string) $data['password'], false, (bool) $data['crawler']); + Arsse::$db->subscriptionPropertiesSet(Arsse::$user->id, $id, $props); + $tr->commit(); + } catch (FeedException $e) { + $msg = [ + 10502 => "Fetch404", + 10506 => "Fetch403", + 10507 => "Fetch401", + ][$e->getCode()] ?? "FetchOther"; + return new ErrorResponse($msg, 502); + } catch (ExceptionInput $e) { + switch ($e->getCode()) { + case 10235: + return new ErrorResponse("MissingCategory", 422); + case 10236: + return new ErrorResponse("DuplicateFeed", 409); + } + } + return new Response(['feed_id' => $id], 201); + } + public static function tokenGenerate(string $user, string $label): string { // Miniflux produces tokens in base64url alphabet $t = str_replace(["+", "/"], ["-", "_"], base64_encode(random_bytes(self::TOKEN_LENGTH))); diff --git a/locale/en.php b/locale/en.php index b809895..1f917c4 100644 --- a/locale/en.php +++ b/locale/en.php @@ -19,10 +19,12 @@ return [ 'API.Miniflux.Error.Fetch401' => 'You are not authorized to access this resource (invalid username/password)', 'API.Miniflux.Error.Fetch403' => 'Unable to fetch this resource (Status Code = 403)', 'API.Miniflux.Error.FetchOther' => 'Unable to fetch this resource', - 'API.Miniflux.Error.DuplicateCategory' => 'Category "{title}" already exists', + 'API.Miniflux.Error.DuplicateCategory' => 'This category already exists.', 'API.Miniflux.Error.InvalidCategory' => 'Invalid category title "{title}"', + 'API.Miniflux.Error.MissingCategory' => 'This category does not exist or does not belong to this user.', 'API.Miniflux.Error.InvalidElevation' => 'Only administrators can change permissions of standard users', 'API.Miniflux.Error.DuplicateUser' => 'The user name "{user}" already exists', + 'API.Miniflux.Error.DuplicateFeed' => 'This feed already exists.', 'API.TTRSS.Category.Uncategorized' => 'Uncategorized', 'API.TTRSS.Category.Special' => 'Special', diff --git a/tests/cases/Database/SeriesSubscription.php b/tests/cases/Database/SeriesSubscription.php index 389495d..3cdeeea 100644 --- a/tests/cases/Database/SeriesSubscription.php +++ b/tests/cases/Database/SeriesSubscription.php @@ -221,7 +221,7 @@ trait SeriesSubscription { $subID = $this->nextID("arsse_subscriptions"); \Phake::when(Arsse::$db)->feedUpdate->thenReturn(true); $this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", false)); - \Phake::verify(Arsse::$db)->feedUpdate($feedID, true); + \Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false); $state = $this->primeExpectations($this->data, [ 'arsse_feeds' => ['id','url','username','password'], 'arsse_subscriptions' => ['id','owner','feed'], @@ -238,7 +238,7 @@ trait SeriesSubscription { $subID = $this->nextID("arsse_subscriptions"); \Phake::when(Arsse::$db)->feedUpdate->thenReturn(true); $this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", true)); - \Phake::verify(Arsse::$db)->feedUpdate($feedID, true); + \Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false); $state = $this->primeExpectations($this->data, [ 'arsse_feeds' => ['id','url','username','password'], 'arsse_subscriptions' => ['id','owner','feed'], @@ -256,7 +256,7 @@ trait SeriesSubscription { try { Arsse::$db->subscriptionAdd($this->user, $url, "", "", false); } finally { - \Phake::verify(Arsse::$db)->feedUpdate($feedID, true); + \Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false); $state = $this->primeExpectations($this->data, [ 'arsse_feeds' => ['id','url','username','password'], 'arsse_subscriptions' => ['id','owner','feed'], diff --git a/tests/cases/REST/Miniflux/TestV1.php b/tests/cases/REST/Miniflux/TestV1.php index 54e0343..fbcf82c 100644 --- a/tests/cases/REST/Miniflux/TestV1.php +++ b/tests/cases/REST/Miniflux/TestV1.php @@ -172,16 +172,22 @@ class TestV1 extends \JKingWeb\Arsse\Test\AbstractTest { $this->assertMessage($exp, $this->req("POST", "/discover", ['url' => 2112])); } - public function testDiscoverFeeds(): void { - $exp = new Response([ + /** @dataProvider provideDiscoveries */ + public function testDiscoverFeeds($in, ResponseInterface $exp): void { + $this->assertMessage($exp, $this->req("POST", "/discover", ['url' => $in])); + } + + public function provideDiscoveries(): iterable { + self::clearData(); + $discovered = [ ['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Feed"], ['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Missing"], - ]); - $this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Valid"])); - $exp = new Response([]); - $this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Invalid"])); - $exp = new ErrorResponse("Fetch404", 500); - $this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Missing"])); + ]; + return [ + ["http://localhost:8000/Feed/Discovery/Valid", new Response($discovered)], + ["http://localhost:8000/Feed/Discovery/Invalid", new Response([])], + ["http://localhost:8000/Feed/Discovery/Missing", new ErrorResponse("Fetch404", 502)], + ]; } /** @dataProvider provideUserQueries */