From ffc5579a7a874219cb57f09cc7ae75b2c95d0c44 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Sun, 3 Jan 2021 16:41:15 -0500 Subject: [PATCH] Partial implementation of filter rule handling --- lib/AbstractException.php | 1 + lib/Feed.php | 35 ++++++++++++++++++++++++----------- lib/Rule/Exception.php | 10 ++++++++++ lib/Rule/Rule.php | 31 +++++++++++++++++++++++++++++++ locale/en.php | 1 + tests/cases/Misc/TestRule.php | 22 ++++++++++++++++++++++ tests/phpunit.dist.xml | 1 + 7 files changed, 90 insertions(+), 11 deletions(-) create mode 100644 lib/Rule/Exception.php create mode 100644 lib/Rule/Rule.php create mode 100644 tests/cases/Misc/TestRule.php diff --git a/lib/AbstractException.php b/lib/AbstractException.php index 73a1707..5a575c3 100644 --- a/lib/AbstractException.php +++ b/lib/AbstractException.php @@ -100,6 +100,7 @@ abstract class AbstractException extends \Exception { "ImportExport/Exception.invalidFolderName" => 10613, "ImportExport/Exception.invalidFolderCopy" => 10614, "ImportExport/Exception.invalidTagName" => 10615, + "Rule/Exception.invalidPattern" => 10701, ]; public function __construct(string $msgID = "", $vars = null, \Throwable $e = null) { diff --git a/lib/Feed.php b/lib/Feed.php index 81256a6..da01d2f 100644 --- a/lib/Feed.php +++ b/lib/Feed.php @@ -79,10 +79,14 @@ class Feed { // we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged if (!sizeof($this->newItems) && !sizeof($this->changedItems)) { $this->modified = false; - } - // if requested, scrape full content for any new and changed items - if ($scrape) { - $this->scrape(); + } else { + if ($feedID) { + $this->computeFilterRules($feedID); + } + // if requested, scrape full content for any new and changed items + if ($scrape) { + $this->scrape(); + } } } // compute the time at which the feed should next be fetched @@ -119,7 +123,7 @@ class Feed { } } - protected function parse(): bool { + protected function parse(): void { try { $feed = $this->resource->reader->getParser( $this->resource->getUrl(), @@ -222,7 +226,6 @@ class Feed { sort($f->categories); } $this->data = $feed; - return true; } protected function deduplicateItems(array $items): array { @@ -269,13 +272,13 @@ class Feed { return $out; } - protected function matchToDatabase(int $feedID = null): bool { + protected function matchToDatabase(int $feedID = null): void { // first perform deduplication on items $items = $this->deduplicateItems($this->data->items); // if we haven't been given a database feed ID to check against, all items are new if (is_null($feedID)) { $this->newItems = $items; - return true; + return; } // get as many of the latest articles in the database as there are in the feed $articles = Arsse::$db->feedMatchLatest($feedID, sizeof($items))->getAll(); @@ -303,7 +306,6 @@ class Feed { // merge the two change-lists, preserving keys $this->changedItems = array_combine(array_merge(array_keys($this->changedItems), array_keys($changed)), array_merge($this->changedItems, $changed)); } - return true; } protected function matchItems(array $items, array $articles): array { @@ -438,7 +440,7 @@ class Feed { return $dates; } - protected function scrape(): bool { + protected function scrape(): void { $scraper = new Scraper(self::configure()); foreach (array_merge($this->newItems, $this->changedItems) as $item) { $scraper->setUrl($item->url); @@ -447,6 +449,17 @@ class Feed { $item->content = $scraper->getFilteredContent(); } } - return true; + } + + protected function computeFilterRules(int $feedID): void { + return; + $rules = Arsse::$db->feedRulesGet($feedID); + foreach ($rules as $r) { + $keep = ""; + $block = ""; + if (strlen($r['keep'])) { + + } + } } } diff --git a/lib/Rule/Exception.php b/lib/Rule/Exception.php new file mode 100644 index 0000000..e3c6664 --- /dev/null +++ b/lib/Rule/Exception.php @@ -0,0 +1,10 @@ +", $pattern, $m, \PREG_OFFSET_CAPTURE)) { + // where necessary escape our chosen delimiter (backtick) in reverse order + foreach (array_reverse($m[0]) as [,$pos]) { + // count the number of backslashes preceding the delimiter character + $count = 0; + $p = $pos; + while ($p-- && $pattern[$p] === "\\" && ++$count); + // if the number is even (including zero), add a backslash + if ($count % 2 === 0) { + $pattern = substr($pattern, 0, $pos)."\\".substr($pattern, $pos); + } + } + } + // add the delimiters and test the pattern + $pattern = "`$pattern`u"; + if (@preg_match($pattern, "") === false) { + throw new Exception("invalidPattern"); + } + return $pattern; + } +} \ No newline at end of file diff --git a/locale/en.php b/locale/en.php index d67547a..1927eaf 100644 --- a/locale/en.php +++ b/locale/en.php @@ -194,4 +194,5 @@ return [ 'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderName' => 'Input data contains an invalid folder name', 'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderCopy' => 'Input data contains multiple folders of the same name under the same parent', 'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidTagName' => 'Input data contains an invalid tag name', + 'Exception.JKingWeb/Arsse/Rule/Exception.invalidPattern' => 'Specified rule pattern is invalid' ]; diff --git a/tests/cases/Misc/TestRule.php b/tests/cases/Misc/TestRule.php new file mode 100644 index 0000000..804b172 --- /dev/null +++ b/tests/cases/Misc/TestRule.php @@ -0,0 +1,22 @@ +assertSame($exp, Rule::prep("`..`..\\`..\\\\`..")); + } + + public function testPrepareAnInvalidPattern(): void { + $this->assertException("invalidPattern", "Rule"); + Rule::prep("["); + } +} \ No newline at end of file diff --git a/tests/phpunit.dist.xml b/tests/phpunit.dist.xml index 1848665..0875bf5 100644 --- a/tests/phpunit.dist.xml +++ b/tests/phpunit.dist.xml @@ -51,6 +51,7 @@ cases/Misc/TestContext.php cases/Misc/TestURL.php cases/Misc/TestHTTP.php + cases/Misc/TestRule.php cases/User/TestInternal.php