Browse Source
Whilethis is simple, it will not work due to embedded XPath in parameters in some Instapaper directives which are not documented by FiveFilters. A nominally XPath-aware parser will be required.master
J. King
2 years ago
2 changed files with 58 additions and 0 deletions
@ -0,0 +1,48 @@ |
|||
<?php |
|||
/** @license MIT |
|||
* Copyright 2021, J. King |
|||
* See LICENSE and AUTHORS files for details */ |
|||
|
|||
declare(strict_types=1); |
|||
namespace MensBeam\FullText; |
|||
|
|||
class SitePattern { |
|||
public function __construct(string $rules) { |
|||
$lines = preg_split('/\r\n?|\n/', $rules); |
|||
foreach ($lines as $l => $d) { |
|||
if (preg_match('/^#|^\s*$/', $d)) { |
|||
// the line contains no data |
|||
continue; |
|||
} elseif (!preg_match('/^([a-z_]+)(?:\(([a-zA-Z0-9\-_]+)\))?:\s*(.+)$/', $d, $m)) { |
|||
throw new SitePatternException("Malformed data on line ".($l + 1), 1); |
|||
} |
|||
[, $directive, $param, $value] = $m; |
|||
switch ($directive) { |
|||
case "title": |
|||
case "body": |
|||
case "date": |
|||
case "author": |
|||
case "strip": |
|||
case "single_page_link": |
|||
case "single_page_link_in_feed": |
|||
case "next_page_link": |
|||
# XPath |
|||
case "strip_id_or_class": |
|||
case "strip_image_src": |
|||
case "find_string": |
|||
# string |
|||
case "replace_sring": |
|||
case "http_header": |
|||
# string with param |
|||
case "tidy": |
|||
case "prune": |
|||
case "atodetect_on_failure": |
|||
# yes/no boolean |
|||
case "test_url": |
|||
# URL |
|||
default: |
|||
# Unsupported directive |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,10 @@ |
|||
<?php |
|||
/** @license MIT |
|||
* Copyright 2021, J. King |
|||
* See LICENSE and AUTHORS files for details */ |
|||
|
|||
declare(strict_types=1); |
|||
namespace MensBeam\FullText; |
|||
|
|||
class SitePatternException extends \Exception { |
|||
} |
Loading…
Reference in new issue