Browse Source

Separate directives into processing stages

master
J. King 2 years ago
parent
commit
06601b02e6
  1. 43
      lib/SitePattern.php

43
lib/SitePattern.php

@ -7,17 +7,14 @@ declare(strict_types=1);
namespace MensBeam\FullText; namespace MensBeam\FullText;
class SitePattern { class SitePattern {
public $title = []; public $preParse = [];
public $body = []; public $cleanUp = [];
public $date = []; public $readOut = [];
public $author = [];
public $strip = [];
public $single_page_link = [];
public $single_page_link_in_feed = [];
public $next_page_link = [];
public function __construct(string $rules) { public function __construct(string $rules) {
$lines = preg_split('/\r\n?|\n/', $rules); $lines = preg_split('/\r\n?|\n/', $rules);
$findString = null;
$findStringLine = 0;
foreach ($lines as $l => $d) { foreach ($lines as $l => $d) {
if (preg_match('/^#|^\s*$/', $d)) { if (preg_match('/^#|^\s*$/', $d)) {
// the line contains no data // the line contains no data
@ -94,29 +91,45 @@ class SitePattern {
case "single_page_link": case "single_page_link":
case "single_page_link_in_feed": case "single_page_link_in_feed":
case "next_page_link": case "next_page_link":
$this->$directive[] = $value; $stage = ($directive === "strip") ? "cleanUp" : "readOut";
// TODO: evaluate the XPathexpression to ensure syntactic validity // TODO: evaluate the XPath expression to ensure syntactic validity
$this->$stage[] = [$directive, [$value]];
break; break;
case "strip_id_or_class": case "strip_id_or_class":
case "strip_image_src": case "strip_image_src":
case "find_string": case "find_string":
// string values may be enclosed by single or double quotation marks as leading and trailing whitespace is otherwise stripped
if ((preg_match('/^"/', $value) && preg_match('/"$/', $value)) || (preg_match("/^'/", $value) && preg_match("/'$/", $value))) { if ((preg_match('/^"/', $value) && preg_match('/"$/', $value)) || (preg_match("/^'/", $value) && preg_match("/'$/", $value))) {
$this->$directive[] = substr($value, 1, strlen($value) - 2); $value = substr($value, 1, strlen($value) - 2);
} else { } else {
$this->$directive[] = $value $value = $value;
} }
if ($directive === "find_string") {
// the "find_string" directive must be matched to a later "replace_string" directive, so we keep one buffered
// a find_string which is not cleared by a replace_string is an error
if ($findString === null) {
$findString = $value;
$findStringLine = $l + 1;
} else {
throw new SitePatternException("\"find_string\" directive on line $findStringLine is not followed by a matching \"replace_string\" directive", 3);
}
} else {
$this->cleanUp[] = [$directive, [$value]];
}
break;
case "replace_sring": case "replace_sring":
# requires special handling to match up with find_string
case "http_header": case "http_header":
# string with param # string with param
case "tidy": case "tidy":
case "prune": case "prune":
case "atodetect_on_failure": case "atodetect_on_failure":
if ($value === "yes") { if ($value === "yes") {
$this->$directive = true; $value = true;
} elseif ($value === "no") { } elseif ($value === "no") {
$this->$directive = false; $value = false;
} else { } else {
throw new SitePatternException("Invalid value for boolean on line ".($l + 1), 1); throw new SitePatternException("Invalid value for boolean on line ".($l + 1), 2);
} }
break; break;
case "test_url": case "test_url":

Loading…
Cancel
Save