diff --git a/lib/REST/TinyTinyRSS/Search.php b/lib/REST/TinyTinyRSS/Search.php new file mode 100644 index 0000000..4ff634b --- /dev/null +++ b/lib/REST/TinyTinyRSS/Search.php @@ -0,0 +1,361 @@ + "unread", + "star" => "starred", + "note" => "annotated", + "pub" => "published", // TODO: not implemented + ]; + const FIELDS_TEXT = [ + "title" => "titleTerms", + "author" => "authorTerms", + "note" => "annotationTerms", + "" => "searchTerms", + ]; + + public static function parse(string $search, Context $context = null) { + // normalize the input + $search = strtolower(trim(preg_replace("<\s+>", " ", $search))); + // set initial state + $tokens = []; + $pos = -1; + $stop = strlen($search); + $state = self::STATE_BEFORE_TOKEN; + $buffer = ""; + $tag = ""; + $flag_negative = false; + $context = $context ?? new Context; + // process + try { + while (++$pos <= $stop) { + $char = @$search[$pos]; + switch ($state) { + case self::STATE_BEFORE_TOKEN: + switch ($char) { + case "": + continue 3; + case " ": + continue 3; + case '"': + if ($flag_negative) { + $buffer .= $char; + $state = self::STATE_IN_TOKEN_OR_TAG; + } else { + $state = self::STATE_BEFORE_TOKEN_QUOTED; + } + continue 3; + case "-": + if (!$flag_negative) { + $flag_negative = true; + } else { + $buffer .= $char; + $state = self::STATE_IN_TOKEN_OR_TAG; + } + continue 3; + case "@": + $state = self::STATE_IN_DATE; + continue 3; + case ":": + $state = self::STATE_IN_TOKEN; + continue 3; + default: + $buffer .= $char; + $state = self::STATE_IN_TOKEN_OR_TAG; + continue 3; + } + case self::STATE_BEFORE_TOKEN_QUOTED: + switch ($char) { + case "": + continue 3; + case '"': + if (($pos + 1 == $stop) || $search[$pos + 1] === " ") { + $context = self::processToken($context, $buffer, $tag, $flag_negative, false); + $state = self::STATE_BEFORE_TOKEN; + $flag_negative = false; + $buffer = $tag = ""; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + $state = self::STATE_IN_TOKEN_OR_TAG_QUOTED; + } else { + $state = self::STATE_IN_TOKEN_OR_TAG; + } + continue 3; + case "\\": + if ($pos + 1 == $stop) { + $buffer .= $char; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + } else { + $buffer .= $char; + } + $state = self::STATE_IN_TOKEN_OR_TAG_QUOTED; + continue 3; + case "-": + if (!$flag_negative) { + $flag_negative = true; + } else { + $buffer .= $char; + $state = self::STATE_IN_TOKEN_OR_TAG_QUOTED; + } + continue 3; + case "@": + $state = self::STATE_IN_DATE_QUOTED; + continue 3; + case ":": + $state = self::STATE_IN_TOKEN_QUOTED; + continue 3; + default: + $buffer .= $char; + $state = self::STATE_IN_TOKEN_OR_TAG_QUOTED; + continue 3; + } + case self::STATE_IN_DATE: + while ($pos < $stop && $search[$pos] !== " ") { + $buffer .= $search[$pos++]; + } + $context = self::processToken($context, $buffer, $tag, $flag_negative, true); + $state = self::STATE_BEFORE_TOKEN; + $flag_negative = false; + $buffer = $tag = ""; + continue 2; + case self::STATE_IN_DATE_QUOTED: + switch ($char) { + case "": + case '"': + if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") { + $context = self::processToken($context, $buffer, $tag, $flag_negative, true); + $state = self::STATE_BEFORE_TOKEN; + $flag_negative = false; + $buffer = $tag = ""; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + } else { + $state = self::STATE_IN_DATE; + } + continue 3; + case "\\": + if ($pos + 1 == $stop) { + $buffer .= $char; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + } else { + $buffer .= $char; + } + continue 3; + default: + $buffer .= $char; + continue 3; + } + case self::STATE_IN_TOKEN: + while ($pos < $stop && $search[$pos] !== " ") { + $buffer .= $search[$pos++]; + } + if (!strlen($tag)) { + $buffer = ":".$buffer; + } + $context = self::processToken($context, $buffer, $tag, $flag_negative, false); + $state = self::STATE_BEFORE_TOKEN; + $flag_negative = false; + $buffer = $tag = ""; + continue 2; + case self::STATE_IN_TOKEN_QUOTED: + switch ($char) { + case "": + case '"': + if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") { + if (!strlen($tag)) { + $buffer = ":".$buffer; + } + $context = self::processToken($context, $buffer, $tag, $flag_negative, false); + $state = self::STATE_BEFORE_TOKEN; + $flag_negative = false; + $buffer = $tag = ""; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + } else { + $state = self::STATE_IN_TOKEN; + } + continue 3; + case "\\": + if ($pos + 1 == $stop) { + $buffer .= $char; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + } else { + $buffer .= $char; + } + continue 3; + default: + $buffer .= $char; + continue 3; + } + case self::STATE_IN_TOKEN_OR_TAG: + switch ($char) { + case "": + case " ": + $context = self::processToken($context, $buffer, $tag, $flag_negative, false); + $state = self::STATE_BEFORE_TOKEN; + $flag_negative = false; + $buffer = $tag = ""; + continue 3; + case ":"; + $tag = $buffer; + $buffer = ""; + $state = self::STATE_IN_TOKEN; + continue 3; + default: + $buffer .= $char; + continue 3; + } + case self::STATE_IN_TOKEN_OR_TAG_QUOTED: + switch ($char) { + case "": + case '"': + if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") { + $context = self::processToken($context, $buffer, $tag, $flag_negative, false); + $state = self::STATE_BEFORE_TOKEN; + $flag_negative = false; + $buffer = $tag = ""; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + } else { + $state = self::STATE_IN_TOKEN_OR_TAG; + } + continue 3; + case "\\": + if ($pos + 1 == $stop) { + $buffer .= $char; + } elseif ($search[$pos + 1] === '"') { + $buffer .= '"'; + $pos++; + } else { + $buffer .= $char; + } + continue 3; + case ":": + $tag = $buffer; + $buffer = ""; + $state = self::STATE_IN_TOKEN_QUOTED; + continue 3; + default: + $buffer .= $char; + continue 3; + } + default: + throw new \Exception; // @codeCoverageIgnore + } + } + } catch (Exception $e) { + return null; + } + return $context; + } + + protected static function processToken(Context $c, string $value, string $tag, bool $neg, bool $date): Context { + if (!strlen($value) && !strlen($tag)) { + return $c; + } elseif (!strlen($value)) { + // if a tag has an empty value, the tag is treated as a search term instead + $value = "$tag:"; + $tag = ""; + } + if ($date) { + return self::setDate($value, $c, $neg); + } elseif (isset(self::FIELDS_BOOLEAN[$tag])) { + return self::setBoolean($tag, $value, $c, $neg); + } else { + return self::addTerm($tag, $value, $c, $neg); + } + } + + protected static function addTerm(string $tag, string $value, Context $c, bool $neg): Context { + $c = $neg ? $c->not : $c; + $type = self::FIELDS_TEXT[$tag] ?? ""; + if (!$type) { + $value = "$tag:$value"; + $type = self::FIELDS_TEXT[""]; + } + return $c->$type(array_merge($c->$type ?? [], [$value])); + } + + protected static function setDate(string $value, Context $c, bool $neg): Context { + $spec = Date::normalize($value); + // TTRSS treats invalid dates as the start of the Unix epoch; we ignore them instead + if (!$spec) { + return $c; + } + $day = $spec->format("Y-m-d"); + $start = $day."T00:00:00+00:00"; + $end = $day."T23:59:59+00:00"; + // if a date is already set, the same date is a no-op; anything else is a contradiction + $cc = $neg ? $c->not : $c; + if ($cc->modifiedSince() || $cc->notModifiedSince()) { + if (!$cc->modifiedSince() || !$cc->notModifiedSince() || $cc->modifiedSince->format("c") !== $start || $cc->notModifiedSince->format("c") !== $end) { + // FIXME: multiple negative dates should be allowed, but the design of the Context class does not support this + throw new Exception; + } else { + return $c; + } + } + $cc->modifiedSince($start); + $cc->notModifiedSince($end); + return $c; + } + + protected static function setBoolean(string $tag, string $value, Context $c, bool $neg): Context { + $set = ["true" => true, "false" => false][$value] ?? null; + if (is_null($set)) { + return self::addTerm($tag, $value, $c, $neg); + } else { + // apply negation + $set = $neg ? !$set : $set; + if ($tag === "pub") { + // TODO: this needs to be implemented correctly if the Published feed is implemented + // currently specifying true will always yield an empty result (nothing is ever published), and specifying false is a no-op (matches everything) + if ($set) { + throw new Exception; + } else { + return $c; + } + } else { + $field = (self::FIELDS_BOOLEAN[$tag] ?? ""); + if (!$c->$field()) { + // field has not yet been set; set it + return $c->$field($set); + } elseif ($c->$field == $set) { + // field is already set to same value; do nothing + return $c; + } else { + // contradiction: query would return no results + throw new Exception; + } + } + } + } +} diff --git a/tests/cases/REST/TinyTinyRSS/TestSearch.php b/tests/cases/REST/TinyTinyRSS/TestSearch.php new file mode 100644 index 0000000..62ad553 --- /dev/null +++ b/tests/cases/REST/TinyTinyRSS/TestSearch.php @@ -0,0 +1,126 @@ + ["", new Context], + 'Whitespace only' => [" \n \t", new Context], + 'Simple bare token' => ['OOK', (new Context)->searchTerms(["ook"])], + 'Simple negative bare token' => ['-OOK', (new Context)->not->searchTerms(["ook"])], + 'Simple quoted token' => ['"OOK eek"', (new Context)->searchTerms(["ook eek"])], + 'Simple negative quoted token' => ['"-OOK eek"', (new Context)->not->searchTerms(["ook eek"])], + 'Simple bare tokens' => ['OOK eek', (new Context)->searchTerms(["ook", "eek"])], + 'Simple mixed bare tokens' => ['-OOK eek', (new Context)->not->searchTerms(["ook"])->searchTerms(["eek"])], + 'Unclosed quoted token' => ['"OOK eek', (new Context)->searchTerms(["ook eek"])], + 'Unclosed quoted token 2' => ['"OOK eek" "', (new Context)->searchTerms(["ook eek"])], + 'Broken quoted token 1' => ['"-OOK"eek"', (new Context)->not->searchTerms(["ookeek\""])], + 'Broken quoted token 2' => ['""eek"', (new Context)->searchTerms(["eek\""])], + 'Broken quoted token 3' => ['"-"eek"', (new Context)->not->searchTerms(["eek\""])], + 'Empty quoted token' => ['""', new Context], + 'Simple quoted tokens' => ['"OOK eek" "eek ack"', (new Context)->searchTerms(["ook eek", "eek ack"])], + 'Bare blank tag' => [':ook', (new Context)->searchTerms([":ook"])], + 'Quoted blank tag' => ['":ook"', (new Context)->searchTerms([":ook"])], + 'Bare negative blank tag' => ['-:ook', (new Context)->not->searchTerms([":ook"])], + 'Quoted negative blank tag' => ['"-:ook"', (new Context)->not->searchTerms([":ook"])], + 'Bare valueless blank tag' => [':', (new Context)->searchTerms([":"])], + 'Quoted valueless blank tag' => ['":"', (new Context)->searchTerms([":"])], + 'Bare negative valueless blank tag' => ['-:', (new Context)->not->searchTerms([":"])], + 'Quoted negative valueless blank tag' => ['"-:"', (new Context)->not->searchTerms([":"])], + 'Double negative' => ['--eek', (new Context)->not->searchTerms(["-eek"])], + 'Double negative 2' => ['--@eek', (new Context)->not->searchTerms(["-@eek"])], + 'Double negative 3' => ['"--@eek"', (new Context)->not->searchTerms(["-@eek"])], + 'Double negative 4' => ['"--eek"', (new Context)->not->searchTerms(["-eek"])], + 'Negative before quote' => ['-"ook"', (new Context)->not->searchTerms(["\"ook\""])], + 'Bare unread tag true' => ['UNREAD:true', (new Context)->unread(true)], + 'Bare unread tag false' => ['UNREAD:false', (new Context)->unread(false)], + 'Bare negative unread tag true' => ['-unread:true', (new Context)->unread(false)], + 'Bare negative unread tag false' => ['-unread:false', (new Context)->unread(true)], + 'Quoted unread tag true' => ['"UNREAD:true"', (new Context)->unread(true)], + 'Quoted unread tag false' => ['"UNREAD:false"', (new Context)->unread(false)], + 'Quoted negative unread tag true' => ['"-unread:true"', (new Context)->unread(false)], + 'Quoted negative unread tag false' => ['"-unread:false"', (new Context)->unread(true)], + 'Bare star tag true' => ['STAR:true', (new Context)->starred(true)], + 'Bare star tag false' => ['STAR:false', (new Context)->starred(false)], + 'Bare negative star tag true' => ['-star:true', (new Context)->starred(false)], + 'Bare negative star tag false' => ['-star:false', (new Context)->starred(true)], + 'Quoted star tag true' => ['"STAR:true"', (new Context)->starred(true)], + 'Quoted star tag false' => ['"STAR:false"', (new Context)->starred(false)], + 'Quoted negative star tag true' => ['"-star:true"', (new Context)->starred(false)], + 'Quoted negative star tag false' => ['"-star:false"', (new Context)->starred(true)], + 'Bare note tag true' => ['NOTE:true', (new Context)->annotated(true)], + 'Bare note tag false' => ['NOTE:false', (new Context)->annotated(false)], + 'Bare negative note tag true' => ['-note:true', (new Context)->annotated(false)], + 'Bare negative note tag false' => ['-note:false', (new Context)->annotated(true)], + 'Quoted note tag true' => ['"NOTE:true"', (new Context)->annotated(true)], + 'Quoted note tag false' => ['"NOTE:false"', (new Context)->annotated(false)], + 'Quoted negative note tag true' => ['"-note:true"', (new Context)->annotated(false)], + 'Quoted negative note tag false' => ['"-note:false"', (new Context)->annotated(true)], + 'Bare pub tag true' => ['PUB:true', null], + 'Bare pub tag false' => ['PUB:false', new Context], + 'Bare negative pub tag true' => ['-pub:true', new Context], + 'Bare negative pub tag false' => ['-pub:false', null], + 'Quoted pub tag true' => ['"PUB:true"', null], + 'Quoted pub tag false' => ['"PUB:false"', new Context], + 'Quoted negative pub tag true' => ['"-pub:true"', new Context], + 'Quoted negative pub tag false' => ['"-pub:false"', null], + 'Non-boolean unread tag' => ['unread:maybe', (new Context)->searchTerms(["unread:maybe"])], + 'Non-boolean star tag' => ['star:maybe', (new Context)->searchTerms(["star:maybe"])], + 'Non-boolean pub tag' => ['pub:maybe', (new Context)->searchTerms(["pub:maybe"])], + 'Non-boolean note tag' => ['note:maybe', (new Context)->annotationTerms(["maybe"])], + 'Valueless unread tag' => ['unread:', (new Context)->searchTerms(["unread:"])], + 'Valueless star tag' => ['star:', (new Context)->searchTerms(["star:"])], + 'Valueless pub tag' => ['pub:', (new Context)->searchTerms(["pub:"])], + 'Valueless note tag' => ['note:', (new Context)->searchTerms(["note:"])], + 'Valueless title tag' => ['title:', (new Context)->searchTerms(["title:"])], + 'Valueless author tag' => ['author:', (new Context)->searchTerms(["author:"])], + 'Escaped quote 1' => ['"""I say, Jeeves!"""', (new Context)->searchTerms(["\"i say, jeeves!\""])], + 'Escaped quote 2' => ['"\\"I say, Jeeves!\\""', (new Context)->searchTerms(["\"i say, jeeves!\""])], + 'Escaped quote 3' => ['\\"I say, Jeeves!\\"', (new Context)->searchTerms(["\\\"i", "say,", "jeeves!\\\""])], + 'Escaped quote 4' => ['"\\"\\I say, Jeeves!\\""', (new Context)->searchTerms(["\"\\i say, jeeves!\""])], + 'Escaped quote 5' => ['"\\I say, Jeeves!"', (new Context)->searchTerms(["\\i say, jeeves!"])], + 'Escaped quote 6' => ['"\\"I say, Jeeves!\\', (new Context)->searchTerms(["\"i say, jeeves!\\"])], + 'Escaped quote 7' => ['"\\', (new Context)->searchTerms(["\\"])], + 'Quoted author tag 1' => ['"author:Neal Stephenson"', (new Context)->authorTerms(["neal stephenson"])], + 'Quoted author tag 2' => ['"author:Jo ""Cap\'n Tripps"" Ashburn"', (new Context)->authorTerms(["jo \"cap'n tripps\" ashburn"])], + 'Quoted author tag 3' => ['"author:Jo \\"Cap\'n Tripps\\" Ashburn"', (new Context)->authorTerms(["jo \"cap'n tripps\" ashburn"])], + 'Quoted author tag 4' => ['"author:Jo ""Cap\'n Tripps"Ashburn"', (new Context)->authorTerms(["jo \"cap'n trippsashburn\""])], + 'Quoted author tag 5' => ['"author:Jo ""Cap\'n Tripps\ Ashburn"', (new Context)->authorTerms(["jo \"cap'n tripps\\ ashburn"])], + 'Quoted author tag 6' => ['"author:Neal Stephenson\\', (new Context)->authorTerms(["neal stephenson\\"])], + 'Quoted title tag' => ['"title:Generic title"', (new Context)->titleTerms(["generic title"])], + 'Contradictory booleans' => ['unread:true -unread:true', null], + 'Doubled boolean' => ['unread:true unread:true', (new Context)->unread(true)], + 'Bare blank date' => ['@', new Context], + 'Quoted blank date' => ['"@"', new Context], + 'Bare ISO date' => ['@2019-03-01', (new Context)->modifiedSince("2019-03-01T00:00:00Z")->notModifiedSince("2019-03-01T23:59:59Z")], + 'Quoted ISO date' => ['"@March 1st, 2019"', (new Context)->modifiedSince("2019-03-01T00:00:00Z")->notModifiedSince("2019-03-01T23:59:59Z")], + 'Bare negative ISO date' => ['-@2019-03-01', (new Context)->not->modifiedSince("2019-03-01T00:00:00Z")->not->notModifiedSince("2019-03-01T23:59:59Z")], + 'Quoted negative English date' => ['"-@March 1st, 2019"', (new Context)->not->modifiedSince("2019-03-01T00:00:00Z")->not->notModifiedSince("2019-03-01T23:59:59Z")], + 'Invalid date' => ['@Bugaboo', new Context], + 'Escaped quoted date 1' => ['"@""Yesterday" and today', (new Context)->searchTerms(["and", "today"])], + 'Escaped quoted date 2' => ['"@\\"Yesterday" and today', (new Context)->searchTerms(["and", "today"])], + 'Escaped quoted date 3' => ['"@Yesterday\\', new Context], + 'Escaped quoted date 4' => ['"@Yesterday\\and today', new Context], + 'Escaped quoted date 5' => ['"@Yesterday"and today', (new Context)->searchTerms(["today"])], + 'Contradictory dates' => ['@Yesterday @Today', null], + 'Doubled date' => ['"@March 1st, 2019" @2019-03-01', (new Context)->modifiedSince("2019-03-01T00:00:00Z")->notModifiedSince("2019-03-01T23:59:59Z")], + 'Doubled negative date' => ['"-@March 1st, 2019" -@2019-03-01', (new Context)->not->modifiedSince("2019-03-01T00:00:00Z")->not->notModifiedSince("2019-03-01T23:59:59Z")], + ]; + } + + /** @dataProvider provideSearchStrings */ + public function testApplySearchToContext(string $search, $exp) { + $act = Search::parse($search); + //var_export($act); + $this->assertEquals($exp, $act); + } +} diff --git a/tests/phpunit.xml b/tests/phpunit.xml index 65a0893..aac033b 100644 --- a/tests/phpunit.xml +++ b/tests/phpunit.xml @@ -99,6 +99,7 @@ cases/REST/NextCloudNews/PDO/TestV1_2.php + cases/REST/TinyTinyRSS/TestSearch.php cases/REST/TinyTinyRSS/TestAPI.php cases/REST/TinyTinyRSS/TestIcon.php cases/REST/TinyTinyRSS/PDO/TestAPI.php