You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.5 KiB
48 lines
1.5 KiB
2 years ago
|
<?php
|
||
|
/** @license MIT
|
||
|
* Copyright 2021, J. King
|
||
|
* See LICENSE and AUTHORS files for details */
|
||
|
|
||
|
declare(strict_types=1);
|
||
|
namespace MensBeam\FullText;
|
||
|
|
||
|
class SitePattern {
|
||
|
public function __construct(string $rules) {
|
||
|
$lines = preg_split('/\r\n?|\n/', $rules);
|
||
|
foreach ($lines as $l => $d) {
|
||
|
if (preg_match('/^#|^\s*$/', $d)) {
|
||
|
// the line contains no data
|
||
|
continue;
|
||
|
} elseif (!preg_match('/^([a-z_]+)(?:\(([a-zA-Z0-9\-_]+)\))?:\s*(.+)$/', $d, $m)) {
|
||
|
throw new SitePatternException("Malformed data on line ".($l + 1), 1);
|
||
|
}
|
||
|
[, $directive, $param, $value] = $m;
|
||
|
switch ($directive) {
|
||
|
case "title":
|
||
|
case "body":
|
||
|
case "date":
|
||
|
case "author":
|
||
|
case "strip":
|
||
|
case "single_page_link":
|
||
|
case "single_page_link_in_feed":
|
||
|
case "next_page_link":
|
||
|
# XPath
|
||
|
case "strip_id_or_class":
|
||
|
case "strip_image_src":
|
||
|
case "find_string":
|
||
|
# string
|
||
|
case "replace_sring":
|
||
|
case "http_header":
|
||
|
# string with param
|
||
|
case "tidy":
|
||
|
case "prune":
|
||
|
case "atodetect_on_failure":
|
||
|
# yes/no boolean
|
||
|
case "test_url":
|
||
|
# URL
|
||
|
default:
|
||
|
# Unsupported directive
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|