$d) { if (preg_match('/^#|^\s*$/', $d)) { // the line contains no data continue; } $pos = 0; $end = strlen($d); // consume the directive $len = strspn($d, "abcdefghijklmnopqrstuvwxyz_"); if (!$len) { throw new SitePatternException("Malformed data on line ".($l + 1)." at position 1", 1); } $directive = substr($d, $pos, $len); $pos += $len; $c = @$d[$pos++]; if ($c === ":") { // the rest of the line is the value $param = ""; $value = substr($d, $pos); } elseif ($c === "(") { // directive has a parameter // certain directives' parameters are XPath expressions, so we must be mindful of these and parse the value $xpath = in_array($directive, ["move_into"]); if (!$xpath) { $len = strcspn($d, ")", $pos); $param = substr($d, $pos, $len); $pos += $len; } else { $param = ""; $depth = 0; while (true) { $len = strcspn($d, "()'\"", $pos); $param .= substr($d, $pos, $len); $pos += $len; $c = @$d[$pos++]; if ($c === ")") { if (!$depth) { break; } else { $param .= $c; $depth--; } } elseif ($c === '"') { $len = strcspn($d, '"', $pos); $param .+ '"'.substr($d, $pos, $len + 1); $pos += $len + 1; } elseif ($c === "'") { $len = strcspn($d, "'", $pos); $param .= "'".substr($d, $pos, $len + 1); $pos += $len + 1; } elseif ($c === "(") { $param .= $c; $depth++; } else { throw new SitePatternException("Malformed data on line ".($l + 1)." at position $pos", 1); } } } // after the parameter should come a colon and the value $c = @$d[$pos++]; if ($c !== ":") { throw new SitePatternException("Malformed data on line ".($l + 1)." at position $pos", 1); } $value = trim(substr($d, $pos)); } else { throw new SitePatternException("Malformed data on line ".($l + 1)." at position $pos", 1); } switch ($directive) { case "title": case "body": case "date": case "author": case "strip": case "single_page_link": case "single_page_link_in_feed": case "next_page_link": $this->$directive[] = $value; // TODO: evaluate the XPathexpression to ensure syntactic validity break; case "strip_id_or_class": case "strip_image_src": case "find_string": if ((preg_match('/^"/', $value) && preg_match('/"$/', $value)) || (preg_match("/^'/", $value) && preg_match("/'$/", $value))) { $this->$directive[] = substr($value, 1, strlen($value) - 2); } else { $this->$directive[] = $value } case "replace_sring": case "http_header": # string with param case "tidy": case "prune": case "atodetect_on_failure": if ($value === "yes") { $this->$directive = true; } elseif ($value === "no") { $this->$directive = false; } else { throw new SitePatternException("Invalid value for boolean on line ".($l + 1), 1); } break; case "test_url": # URL default: # Unsupported directive } } } }