Browse Source

Separate directives into processing stages

master
J. King 2 years ago
parent
commit
06601b02e6
  1. 43
      lib/SitePattern.php

43
lib/SitePattern.php

@ -7,17 +7,14 @@ declare(strict_types=1);
namespace MensBeam\FullText;
class SitePattern {
public $title = [];
public $body = [];
public $date = [];
public $author = [];
public $strip = [];
public $single_page_link = [];
public $single_page_link_in_feed = [];
public $next_page_link = [];
public $preParse = [];
public $cleanUp = [];
public $readOut = [];
public function __construct(string $rules) {
$lines = preg_split('/\r\n?|\n/', $rules);
$findString = null;
$findStringLine = 0;
foreach ($lines as $l => $d) {
if (preg_match('/^#|^\s*$/', $d)) {
// the line contains no data
@ -94,29 +91,45 @@ class SitePattern {
case "single_page_link":
case "single_page_link_in_feed":
case "next_page_link":
$this->$directive[] = $value;
// TODO: evaluate the XPathexpression to ensure syntactic validity
$stage = ($directive === "strip") ? "cleanUp" : "readOut";
// TODO: evaluate the XPath expression to ensure syntactic validity
$this->$stage[] = [$directive, [$value]];
break;
case "strip_id_or_class":
case "strip_image_src":
case "find_string":
// string values may be enclosed by single or double quotation marks as leading and trailing whitespace is otherwise stripped
if ((preg_match('/^"/', $value) && preg_match('/"$/', $value)) || (preg_match("/^'/", $value) && preg_match("/'$/", $value))) {
$this->$directive[] = substr($value, 1, strlen($value) - 2);
$value = substr($value, 1, strlen($value) - 2);
} else {
$this->$directive[] = $value
$value = $value;
}
if ($directive === "find_string") {
// the "find_string" directive must be matched to a later "replace_string" directive, so we keep one buffered
// a find_string which is not cleared by a replace_string is an error
if ($findString === null) {
$findString = $value;
$findStringLine = $l + 1;
} else {
throw new SitePatternException("\"find_string\" directive on line $findStringLine is not followed by a matching \"replace_string\" directive", 3);
}
} else {
$this->cleanUp[] = [$directive, [$value]];
}
break;
case "replace_sring":
# requires special handling to match up with find_string
case "http_header":
# string with param
case "tidy":
case "prune":
case "atodetect_on_failure":
if ($value === "yes") {
$this->$directive = true;
$value = true;
} elseif ($value === "no") {
$this->$directive = false;
$value = false;
} else {
throw new SitePatternException("Invalid value for boolean on line ".($l + 1), 1);
throw new SitePatternException("Invalid value for boolean on line ".($l + 1), 2);
}
break;
case "test_url":

Loading…
Cancel
Save