Browse Source

Started playing around with a VSCode-style parser

main
Dustin Wilson 3 years ago
parent
commit
38504d13ce
  1. 405
      lib/Scope/Parser.php
  2. 40
      lib/Scope/Tokenizer.php

405
lib/Scope/Parser.php

@ -7,392 +7,113 @@ declare(strict_types=1);
namespace dW\Highlighter\Scope;
class Parser {
public static $debug = false;
protected Data $data;
protected int $debugCount = 1;
protected array $lastExceptionData = [];
protected string $token;
protected Tokenizer $tokenizer;
protected static Parser $instance;
protected function __construct(string $selector) {
$this->data = new Data($selector);
$this->tokenizer = new Tokenizer($selector);
}
public static function parse(string $selector): Matcher|false {
public static function parse(string $selector): array {
self::$instance = new self($selector);
$result = false;
self::parseSpace();
$s2 = self::parseSelector();
if ($s2 !== false) {
self::parseSpace();
$result = $s2;
}
if ($result === false && self::$instance->lastExceptionData !== []) {
throw new Exception(self::$instance->lastExceptionData['expected'], self::$instance->lastExceptionData['found']);
}
return $result;
}
protected static function parseComposite(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$position = self::$instance->data->position;
$s1 = self::parseExpression();
if ($s1 !== false) {
self::parseSpace();
$s3 = self::$instance->data->consumeIf('|&-');
if (in_array($s3, [ '|', '&', '-' ])) {
self::parseSpace();
$s5 = self::parseComposite();
if ($s5 !== false) {
$result = new CompositeMatcher($s1, $s3, $s5);
}
} else {
self::fail('|&-');
}
}
if ($result === false) {
self::$instance->data->unconsumeTo($position);
$result = self::parseExpression();
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
}
protected static function parseExpression(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$s1 = self::$instance->data->consumeIf('-');
if ($s1 === '-') {
self::parseSpace();
$s3 = self::parseGroup();
if ($s3 !== false) {
self::parseSpace();
$result = new NegateMatcher($s3);
}
} else {
self::fail('-');
}
if ($result === false) {
$s1 = self::$instance->data->consumeIf('-');
if ($s1 === '' || $s1 === false) {
$s1 = false;
self::fail('-');
}
if ($s1 !== false) {
self::parseSpace();
$s3 = self::parsePath();
if ($s3 !== false) {
self::parseSpace();
$result = new NegateMatcher($s3);
$result = [];
while (self::$instance->token = self::$instance->tokenizer->next()) {
$priority = 0;
if (strlen(self::$instance->token) === 2 && self::$instance->token[1] === ':') {
switch (self::$instance->token[0]) {
case 'R': $priority = 1;
break;
case 'L': $priority = -1;
break;
default: die('OOK!');
}
}
if ($result === false) {
$result = self::parseGroup();
if ($result === false) {
$result = self::parsePath();
self::$instance->token = self::$instance->tokenizer->next();
if (self::$instance->token === false) {
break;
}
}
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
}
protected static function parseGroup(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$prefix = null;
$position = self::$instance->data->position;
$s2 = self::$instance->data->consumeIf('LRB');
if ($s2 === '' || $s2 === false) {
$s2 = false;
self::fail('LRB');
}
if ($s2 !== false) {
$s3 = self::$instance->data->consumeIf(':');
if ($s3 === ':') {
$prefix = "$s2$s3";
} else {
self::fail(':');
$matcher = self::parseConjunction();
if ($matcher === false) {
$matcher = self::parseOperand();
}
}
$s2 = self::$instance->data->consumeIf('(');
if ($s2 === '(') {
self::parseSpace();
$s4 = self::parseSelector();
if ($s4 !== false) {
self::parseSpace();
$s6 = self::$instance->data->consumeIf(')');
if ($s6 === '' || $s6 === false) {
$s6 = false;
self::fail(')');
}
if ($s6 !== false) {
$result = new GroupMatcher($prefix, $s4);
}
}
} else {
self::fail('(');
}
if ($result === false) {
self::$instance->data->unconsumeTo($position);
}
$result[] = [
'matcher' => $matcher,
'priority' => $priority
];
if (self::$debug === true) {
self::debugResult($result);
if (self::$instance->token !== ',') {
break;
}
}
return $result;
}
protected static function parsePath(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$prefix = null;
$s2 = self::$instance->data->consumeIf('LRB');
if (in_array($s2, [ 'L', 'R', 'B' ])) {
$s3 = self::$instance->data->consumeIf(':');
if ($s3 === '' || $s3 === false) {
$s3 = false;
self::fail(':');
}
if ($s3 !== false) {
$prefix = "$s2$s3";
}
} else {
self::fail('LRB');
}
$s2 = self::parseScope();
if ($s2 !== false) {
$s3 = [$s2];
do {
$s6 = false;
self::parseSpace();
$s6 = self::parseScope();
if ($s6 !== false) {
$s3[] = $s6;
}
} while ($s6 !== false);
$result = new PathMatcher($prefix, ...$s3);
}
if (self::$debug === true) {
self::debugResult($result);
protected static function parseConjunction(): AndMatcher|false {
$matchers = [];
while ($matcher = self::parseOperand()) {
$matchers[] = $matcher;
}
return $result;
return (count($matchers) > 1) ? new AndMatcher($matchers[0], $matchers[1]) : false;
}
protected static function parseSegment(): SegmentMatcher|TrueMatcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
self::parseSpace();
$s2 = self::$instance->data->consumeWhile('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_');
if ($s2 === '' || $s2 === false) {
$s2 = false;
self::fail('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_');
}
if ($s2 !== false) {
$s3 = self::$instance->data->consumeWhile('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_');
if ($s3 === '' || $s2 === false) {
$s3 = false;
self::fail('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_');
protected static function parseInnerExpression(): Matcher|false {
$matchers = [];
while ($matcher = self::parseConjunction()) {
$matchers[] = $matcher;
if (self::$instance->token === '|' || self::$instance->token === ',') {
do {
self::$instance->token = self::$instance->tokenizer->next();
} while (self::$instance->token === '|' || self::$instance->token === ',');
} else {
$s2 .= $s3;
break;
}
}
if ($s2 !== false) {
self::parseSpace();
$result = new SegmentMatcher($s2);
}
if ($result === false) {
self::parseSpace();
$s2 = self::$instance->data->consumeIf('*');
if ($s2 === '*') {
self::parseSpace();
$result = new TrueMatcher($s2);
} else {
self::fail('*');
}
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
return (count($matchers) > 1) ? new OrMatcher($matchers[0], $matchers[1]) : false;
}
protected static function parseSelector(): Matcher|false {
if (self::$debug === true) {
self::debug();
}
$result = false;
$position = self::$instance->data->position;
protected static function parseOperand(): Matcher|false {
if (self::$instance->token === '-') {
self::$instance->token = self::$instance->tokenizer->next();
$s1 = self::parseComposite();
if ($s1 !== false) {
self::parseSpace();
$s3 = self::$instance->data->consumeIf(',');
if ($s3 === ',') {
self::parseSpace();
$s5 = self::parseSelector();
$result = ($s5 === false) ? $s1 : new OrMatcher($s1, $s5);
} else {
self::fail(',');
$matcher = self::parseOperand();
if ($matcher === false) {
die('OH SHIT');
}
}
if ($result === false) {
self::$instance->data->unconsumeTo($position);
$result = self::parseComposite();
return new NegateMatcher($matcher);
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
}
protected static function parseScope(): ScopeMatcher|false {
if (self::$debug === true) {
self::debug();
if (self::$instance->token === '(') {
self::$instance->token = self::$instance->tokenizer->next();
$expressionInParents = self::parseInnerExpression();
if (self::$instance->token === ')') {
self::$instance->token = self::$instance->tokenizer->next();
}
return $expressionInParents;
}
$result = false;
$s1 = self::parseSegment();
if ($s1 !== false) {
$s2 = [$s1];
if (self::$instance->tokenizer->tokenIsIdentifier()) {
$identifiers = [];
do {
$s3 = false;
$s4 = self::$instance->data->consumeIf('.');
if ($s4 === '.') {
$s3 = self::parseSegment();
if ($s3 !== false) {
$s2[] = $s3;
}
} else {
self::fail('.');
}
} while ($s3 !== false);
$result = new ScopeMatcher(...$s2);
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
}
protected static function parseSpace(): string|false {
if (self::$debug === true) {
self::debug();
}
$identifiers[] = self::$instance->token;
self::$instance->token = self::$instance->tokenizer->next();
} while (self::$instance->tokenizer->tokenIsIdentifier());
$result = self::$instance->data->consumeWhile(" \t");
if ($result === false) {
self::fail(" \t");
return new ScopeMatcher(...$identifiers);
}
if (self::$debug === true) {
self::debugResult($result);
}
return $result;
}
protected static function debug() {
$message = <<<DEBUG
------------------------------
%s
Method: %s
Position: %s
Char: %s
DEBUG;
$methodTree = '';
$backtrace = debug_backtrace();
array_shift($backtrace);
array_pop($backtrace);
foreach ($backtrace as $b) {
$methodTree = "->{$b['function']}$methodTree";
}
printf($message,
self::$instance->debugCount++,
ltrim($methodTree, '->'),
self::$instance->data->position,
var_export(self::$instance->data->peek(), true)
);
}
protected static function debugResult($result) {
printf("%s Result: %s\n",
debug_backtrace()[1]['function'],
var_export($result, true));
}
protected static function fail(string $expected) {
self::$instance->lastExceptionData = [
'expected' => $expected,
'found' => self::$instance->data->peek()
];
return false;
}
}

40
lib/Scope/Tokenizer.php

@ -0,0 +1,40 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace dW\Highlighter\Scope;
class Tokenizer {
protected array $matches = [];
protected int $position = 0;
public function __construct(string $scope) {
preg_match_all('/([LR]:|[\w\.:][\w\.:\-]*|[\,\|\-\(\)])/', $scope, $matches);
$this->matches = $matches[1];
}
public function next(): string|false {
if (count($this->matches) === 0) {
return false;
}
$result = $this->matches[$this->position] ?? false;
if ($result !== false) {
$this->position++;
}
return $result;
}
public function tokenIsIdentifier(): bool {
if (!isset($this->matches[$this->position])) {
return false;
}
return (!!$this->matches[$this->position] && !!preg_match('/[\w\.:]+/', $this->matches[$this->position]));
}
}
Loading…
Cancel
Save