Rule refactoring

- The Database class is now responsible for preparing rules
- Rules are now returned in an array keyed by user
- Empty strings are now passed through during rule preparation
This commit is contained in:
J. King 2021-01-08 14:17:46 -05:00
parent c1eff8479c
commit 4f34b4ff29
6 changed files with 57 additions and 43 deletions

View file

@ -13,6 +13,8 @@ use JKingWeb\Arsse\Context\Context;
use JKingWeb\Arsse\Misc\Date;
use JKingWeb\Arsse\Misc\ValueInfo as V;
use JKingWeb\Arsse\Misc\URL;
use JKingWeb\Arsse\Rule\Rule;
use JKingWeb\Arsse\Rule\Exception as RuleException;
/** The high-level interface with the database
*
@ -1205,14 +1207,26 @@ class Database {
/** Retrieves the set of filters users have applied to a given feed
*
* Each record includes the following keys:
*
* - "owner": The user for whom to apply the filters
* - "keep": The "keep" rule; any articles which fail to match this rule are hidden
* - "block": The block rule; any article which matches this rule are hidden
* The result is an associative array whose keys are usernames, values
* being an array in turn with the following keys:
*
* - "keep": The "keep" rule as a prepared pattern; any articles which fail to match this rule are hidden
* - "block": The block rule as a prepared pattern; any articles which match this rule are hidden
*/
public function feedRulesGet(int $feedID): Db\Result {
return $this->db->prepare("SELECT owner, coalesce(keep_rule, '') as keep, coalesce(block_rule, '') as block from arsse_subscriptions where feed = ? and (coalesce(keep_rule, '') || coalesce(block_rule, '')) <> '' order by owner", "int")->run($feedID);
public function feedRulesGet(int $feedID): array {
$out = [];
$result = $this->db->prepare("SELECT owner, coalesce(keep_rule, '') as keep, coalesce(block_rule, '') as block from arsse_subscriptions where feed = ? and (coalesce(keep_rule, '') || coalesce(block_rule, '')) <> '' order by owner", "int")->run($feedID);
foreach ($result as $row) {
try {
$keep = Rule::prep($row['keep']);
$block = Rule::prep($row['block']);
} catch (RuleException $e) {
// invalid rules should not normally appear in the database, but it's possible
continue;
}
$out[$row['owner']] = ['keep' => $keep, 'block' => $block];
}
return $out;
}
/** Retrieves various identifiers for the latest $count articles in the given newsfeed. The identifiers are:

View file

@ -455,7 +455,7 @@ class Feed {
protected function computeFilterRules(int $feedID): void {
$rules = Arsse::$db->feedRulesGet($feedID);
foreach ($rules as $r) {
foreach ($rules as $user => $r) {
$stats = ['new' => [], 'changed' => []];
foreach ($this->newItems as $index => $item) {
$stats['new'][$index] = Rule::apply($r['keep'], $r['block'], $item->title, $item->categories);
@ -463,7 +463,7 @@ class Feed {
foreach ($this->changedItems as $index => $item) {
$stats['changed'][$index] = Rule::apply($r['keep'], $r['block'], $item->title, $item->categories);
}
$this->filteredItems[$r['owner']] = $stats;
$this->filteredItems[$user] = $stats;
}
}
}

View file

@ -8,6 +8,9 @@ namespace JKingWeb\Arsse\Rule;
abstract class Rule {
public static function prep(string $pattern): string {
if (!strlen($pattern)) {
return "";
}
if (preg_match_all("<`>", $pattern, $m, \PREG_OFFSET_CAPTURE)) {
// where necessary escape our chosen delimiter (backtick) in reverse order
foreach (array_reverse($m[0]) as [,$pos]) {
@ -42,7 +45,13 @@ abstract class Rule {
*
* Returns true if the article is to be kept, and false if it is to be suppressed
*/
public static function apply(string $keepRule, string $blockRule, string $title, array $categories = []): bool {
public static function apply(string $keepPattern, string $blockPattern, string $title, array $categories = []): bool {
// ensure input is valid
assert(!strlen($keepPattern) || @preg_match($keepPattern, "") !== false, new \Exception("Keep pattern is invalid"));
assert(!strlen($blockPattern) || @preg_match($blockPattern, "") !== false, new \Exception("Block pattern is invalid"));
assert(sizeof(array_filter($categories, function($v) {
return !is_string($v);
})) === 0, new \Exception("All categories must be strings"));
// if neither rule is processed we should keep
$keep = true;
// merge and clean the data to match
@ -50,38 +59,24 @@ abstract class Rule {
return preg_replace('/\s+/', " ", $str);
}, array_merge([$title], $categories));
// process the keep rule if it exists
if (strlen($keepRule)) {
try {
$rule = static::prep($keepRule);
} catch (Exception $e) {
return true;
}
if (strlen($keepPattern)) {
// if a keep rule is specified the default state is now not to keep
$keep = false;
foreach ($data as $str) {
if (is_string($str)) {
if (preg_match($rule, $str)) {
// keep if the keep-rule matches one of the strings
$keep = true;
break;
}
if (preg_match($keepPattern, $str)) {
// keep if the keep-rule matches one of the strings
$keep = true;
break;
}
}
}
// process the block rule if the keep rule was matched
if ($keep && strlen($blockRule)) {
try {
$rule = static::prep($blockRule);
} catch (Exception $e) {
return true;
}
if ($keep && strlen($blockPattern)) {
foreach ($data as $str) {
if (is_string($str)) {
if (preg_match($rule, $str)) {
// do not keep if the block-rule matches one of the strings
$keep = false;
break;
}
if (preg_match($blockPattern, $str)) {
// do not keep if the block-rule matches one of the strings
$keep = false;
break;
}
}
}

View file

@ -76,9 +76,9 @@ trait SeriesFeed {
],
'rows' => [
[1,'john.doe@example.com',1,null,'^Sport$'],
[2,'john.doe@example.com',2,null,null],
[2,'john.doe@example.com',2,"",null],
[3,'john.doe@example.com',3,'\w+',null],
[4,'john.doe@example.com',4,null,null],
[4,'john.doe@example.com',4,'\w+',"["], // invalid rule leads to both rules being ignored
[5,'john.doe@example.com',5,null,'and/or'],
[6,'jane.doe@example.com',1,'^(?i)[a-z]+','bluberry'],
],
@ -205,16 +205,16 @@ trait SeriesFeed {
/** @dataProvider provideFilterRules */
public function testGetRules(int $in, array $exp): void {
$this->assertResult($exp, Arsse::$db->feedRulesGet($in));
$this->assertSame($exp, Arsse::$db->feedRulesGet($in));
}
public function provideFilterRules(): iterable {
return [
[1, [['owner' => "john.doe@example.com", 'keep' => "", 'block' => "^Sport$"], ['owner' => "jane.doe@example.com", 'keep' => "^(?i)[a-z]+", 'block' => "bluberry"]]],
[1, ['jane.doe@example.com' => ['keep' => "`^(?i)[a-z]+`u", 'block' => "`bluberry`u"], 'john.doe@example.com' => ['keep' => "", 'block' => "`^Sport$`u"]]],
[2, []],
[3, [['owner' => "john.doe@example.com", 'keep' => '\w+', 'block' => ""]]],
[3, ['john.doe@example.com' => ['keep' => '`\w+`u', 'block' => ""]]],
[4, []],
[5, [['owner' => "john.doe@example.com", 'keep' => "", 'block' => "and/or"]]],
[5, ['john.doe@example.com' => ['keep' => "", 'block' => "`and/or`u"]]],
];
}

View file

@ -95,7 +95,7 @@ class TestFeed extends \JKingWeb\Arsse\Test\AbstractTest {
self::clearData();
self::setConf();
Arsse::$db = \Phake::mock(Database::class);
\Phake::when(Arsse::$db)->feedRulesGet->thenReturn(new Result([]));
\Phake::when(Arsse::$db)->feedRulesGet->thenReturn([]);
}
public function testParseAFeed(): void {

View file

@ -23,8 +23,15 @@ class TestRule extends \JKingWeb\Arsse\Test\AbstractTest {
Rule::prep("[");
}
public function testPrepareAnEmptyPattern(): void {
$this->assertTrue(Rule::validate(""));
$this->assertSame("", Rule::prep(""));
}
/** @dataProvider provideApplications */
public function testApplyRules(string $keepRule, string $blockRule, string $title, array $categories, $exp): void {
$keepRule = Rule::prep($keepRule);
$blockRule = Rule::prep($blockRule);
if ($exp instanceof \Exception) {
$this->assertException($exp);
Rule::apply($keepRule, $blockRule, $title, $categories);
@ -43,8 +50,6 @@ class TestRule extends \JKingWeb\Arsse\Test\AbstractTest {
["", "^Category$", "Title", ["Dummy", "Category"], false],
["", "^Naught$", "Title", ["Dummy", "Category"], true],
["^Category$", "^Category$", "Title", ["Dummy", "Category"], false],
["[", "", "Title", ["Dummy", "Category"], true],
["", "[", "Title", ["Dummy", "Category"], true],
["", "^A B C$", "A B\nC", ["X\n Y \t \r Z"], false],
["", "^X Y Z$", "A B\nC", ["X\n Y \t \r Z"], false],
];