Browse Source

Started playing around with a VSCode-style parser

main
Dustin Wilson 3 years ago
parent
commit
38504d13ce
  1. 405
      lib/Scope/Parser.php
  2. 40
      lib/Scope/Tokenizer.php

405
lib/Scope/Parser.php

@ -7,392 +7,113 @@ declare(strict_types=1);
namespace dW\Highlighter\Scope; namespace dW\Highlighter\Scope;
class Parser { class Parser {
public static $debug = false; protected string $token;
protected Tokenizer $tokenizer;
protected Data $data;
protected int $debugCount = 1;
protected array $lastExceptionData = [];
protected static Parser $instance; protected static Parser $instance;
protected function __construct(string $selector) { protected function __construct(string $selector) {
$this->data = new Data($selector); $this->tokenizer = new Tokenizer($selector);
} }
public static function parse(string $selector): Matcher|false { public static function parse(string $selector): array {
self::$instance = new self($selector); self::$instance = new self($selector);
$result = false; $result = [];
self::parseSpace(); while (self::$instance->token = self::$instance->tokenizer->next()) {
$s2 = self::parseSelector(); $priority = 0;
if ($s2 !== false) { if (strlen(self::$instance->token) === 2 && self::$instance->token[1] === ':') {
self::parseSpace(); switch (self::$instance->token[0]) {
$result = $s2; case 'R': $priority = 1;
} break;
case 'L': $priority = -1;
if ($result === false && self::$instance->lastExceptionData !== []) { break;
throw new Exception(self::$instance->lastExceptionData['expected'], self::$instance->lastExceptionData['found']); default: die('OOK!');
}
return $result;
}
protected static function parseComposite(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$position = self::$instance->data->position;
$s1 = self::parseExpression();
if ($s1 !== false) {
self::parseSpace();
$s3 = self::$instance->data->consumeIf('|&-');
if (in_array($s3, [ '|', '&', '-' ])) {
self::parseSpace();
$s5 = self::parseComposite();
if ($s5 !== false) {
$result = new CompositeMatcher($s1, $s3, $s5);
}
} else {
self::fail('|&-');
}
}
if ($result === false) {
self::$instance->data->unconsumeTo($position);
$result = self::parseExpression();
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
}
protected static function parseExpression(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$s1 = self::$instance->data->consumeIf('-');
if ($s1 === '-') {
self::parseSpace();
$s3 = self::parseGroup();
if ($s3 !== false) {
self::parseSpace();
$result = new NegateMatcher($s3);
}
} else {
self::fail('-');
}
if ($result === false) {
$s1 = self::$instance->data->consumeIf('-');
if ($s1 === '' || $s1 === false) {
$s1 = false;
self::fail('-');
}
if ($s1 !== false) {
self::parseSpace();
$s3 = self::parsePath();
if ($s3 !== false) {
self::parseSpace();
$result = new NegateMatcher($s3);
} }
}
if ($result === false) { self::$instance->token = self::$instance->tokenizer->next();
$result = self::parseGroup(); if (self::$instance->token === false) {
if ($result === false) { break;
$result = self::parsePath();
} }
} }
}
if (self::$debug === true) { $matcher = self::parseConjunction();
self::debugResult($result); if ($matcher === false) {
} $matcher = self::parseOperand();
return $result;
}
protected static function parseGroup(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$prefix = null;
$position = self::$instance->data->position;
$s2 = self::$instance->data->consumeIf('LRB');
if ($s2 === '' || $s2 === false) {
$s2 = false;
self::fail('LRB');
}
if ($s2 !== false) {
$s3 = self::$instance->data->consumeIf(':');
if ($s3 === ':') {
$prefix = "$s2$s3";
} else {
self::fail(':');
} }
}
$s2 = self::$instance->data->consumeIf('(');
if ($s2 === '(') {
self::parseSpace();
$s4 = self::parseSelector();
if ($s4 !== false) {
self::parseSpace();
$s6 = self::$instance->data->consumeIf(')');
if ($s6 === '' || $s6 === false) {
$s6 = false;
self::fail(')');
}
if ($s6 !== false) {
$result = new GroupMatcher($prefix, $s4);
}
}
} else {
self::fail('(');
}
if ($result === false) { $result[] = [
self::$instance->data->unconsumeTo($position); 'matcher' => $matcher,
} 'priority' => $priority
];
if (self::$debug === true) { if (self::$instance->token !== ',') {
self::debugResult($result); break;
}
} }
return $result; return $result;
} }
protected static function parsePath(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$prefix = null;
$s2 = self::$instance->data->consumeIf('LRB');
if (in_array($s2, [ 'L', 'R', 'B' ])) {
$s3 = self::$instance->data->consumeIf(':');
if ($s3 === '' || $s3 === false) {
$s3 = false;
self::fail(':');
}
if ($s3 !== false) {
$prefix = "$s2$s3";
}
} else {
self::fail('LRB');
}
$s2 = self::parseScope();
if ($s2 !== false) {
$s3 = [$s2];
do {
$s6 = false;
self::parseSpace();
$s6 = self::parseScope();
if ($s6 !== false) {
$s3[] = $s6;
}
} while ($s6 !== false);
$result = new PathMatcher($prefix, ...$s3);
}
if (self::$debug === true) { protected static function parseConjunction(): AndMatcher|false {
self::debugResult($result); $matchers = [];
while ($matcher = self::parseOperand()) {
$matchers[] = $matcher;
} }
return $result; return (count($matchers) > 1) ? new AndMatcher($matchers[0], $matchers[1]) : false;
} }
protected static function parseSegment(): SegmentMatcher|TrueMatcher|false { protected static function parseInnerExpression(): Matcher|false {
if (self::$debug === true) { $matchers = [];
self::debug(); while ($matcher = self::parseConjunction()) {
} $matchers[] = $matcher;
if (self::$instance->token === '|' || self::$instance->token === ',') {
$result = false; do {
self::$instance->token = self::$instance->tokenizer->next();
self::parseSpace(); } while (self::$instance->token === '|' || self::$instance->token === ',');
$s2 = self::$instance->data->consumeWhile('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_');
if ($s2 === '' || $s2 === false) {
$s2 = false;
self::fail('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_');
}
if ($s2 !== false) {
$s3 = self::$instance->data->consumeWhile('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_');
if ($s3 === '' || $s2 === false) {
$s3 = false;
self::fail('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_');
} else { } else {
$s2 .= $s3; break;
} }
} }
if ($s2 !== false) { return (count($matchers) > 1) ? new OrMatcher($matchers[0], $matchers[1]) : false;
self::parseSpace();
$result = new SegmentMatcher($s2);
}
if ($result === false) {
self::parseSpace();
$s2 = self::$instance->data->consumeIf('*');
if ($s2 === '*') {
self::parseSpace();
$result = new TrueMatcher($s2);
} else {
self::fail('*');
}
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
} }
protected static function parseSelector(): Matcher|false { protected static function parseOperand(): Matcher|false {
if (self::$debug === true) { if (self::$instance->token === '-') {
self::debug(); self::$instance->token = self::$instance->tokenizer->next();
}
$result = false;
$position = self::$instance->data->position;
$s1 = self::parseComposite(); $matcher = self::parseOperand();
if ($s1 !== false) { if ($matcher === false) {
self::parseSpace(); die('OH SHIT');
$s3 = self::$instance->data->consumeIf(',');
if ($s3 === ',') {
self::parseSpace();
$s5 = self::parseSelector();
$result = ($s5 === false) ? $s1 : new OrMatcher($s1, $s5);
} else {
self::fail(',');
} }
}
if ($result === false) { return new NegateMatcher($matcher);
self::$instance->data->unconsumeTo($position);
$result = self::parseComposite();
} }
if (self::$debug === true) { if (self::$instance->token === '(') {
self::debugResult($result); self::$instance->token = self::$instance->tokenizer->next();
} $expressionInParents = self::parseInnerExpression();
if (self::$instance->token === ')') {
return $result; self::$instance->token = self::$instance->tokenizer->next();
} }
return $expressionInParents;
protected static function parseScope(): ScopeMatcher|false {
if (self::$debug === true) {
self::debug();
} }
$result = false; if (self::$instance->tokenizer->tokenIsIdentifier()) {
$identifiers = [];
$s1 = self::parseSegment();
if ($s1 !== false) {
$s2 = [$s1];
do { do {
$s3 = false; $identifiers[] = self::$instance->token;
self::$instance->token = self::$instance->tokenizer->next();
$s4 = self::$instance->data->consumeIf('.'); } while (self::$instance->tokenizer->tokenIsIdentifier());
if ($s4 === '.') {
$s3 = self::parseSegment();
if ($s3 !== false) {
$s2[] = $s3;
}
} else {
self::fail('.');
}
} while ($s3 !== false);
$result = new ScopeMatcher(...$s2);
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
}
protected static function parseSpace(): string|false {
if (self::$debug === true) {
self::debug();
}
$result = self::$instance->data->consumeWhile(" \t"); return new ScopeMatcher(...$identifiers);
if ($result === false) {
self::fail(" \t");
} }
if (self::$debug === true) { return false;
self::debugResult($result);
}
return $result;
}
protected static function debug() {
$message = <<<DEBUG
------------------------------
%s
Method: %s
Position: %s
Char: %s
DEBUG;
$methodTree = '';
$backtrace = debug_backtrace();
array_shift($backtrace);
array_pop($backtrace);
foreach ($backtrace as $b) {
$methodTree = "->{$b['function']}$methodTree";
}
printf($message,
self::$instance->debugCount++,
ltrim($methodTree, '->'),
self::$instance->data->position,
var_export(self::$instance->data->peek(), true)
);
}
protected static function debugResult($result) {
printf("%s Result: %s\n",
debug_backtrace()[1]['function'],
var_export($result, true));
}
protected static function fail(string $expected) {
self::$instance->lastExceptionData = [
'expected' => $expected,
'found' => self::$instance->data->peek()
];
} }
} }

40
lib/Scope/Tokenizer.php

@ -0,0 +1,40 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace dW\Highlighter\Scope;
class Tokenizer {
protected array $matches = [];
protected int $position = 0;
public function __construct(string $scope) {
preg_match_all('/([LR]:|[\w\.:][\w\.:\-]*|[\,\|\-\(\)])/', $scope, $matches);
$this->matches = $matches[1];
}
public function next(): string|false {
if (count($this->matches) === 0) {
return false;
}
$result = $this->matches[$this->position] ?? false;
if ($result !== false) {
$this->position++;
}
return $result;
}
public function tokenIsIdentifier(): bool {
if (!isset($this->matches[$this->position])) {
return false;
}
return (!!$this->matches[$this->position] && !!preg_match('/[\w\.:]+/', $this->matches[$this->position]));
}
}
Loading…
Cancel
Save