From 010c0ddca50ac1bc65957a71baa05068dc5d7ba9 Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Fri, 18 Jun 2021 16:41:41 -0500 Subject: [PATCH] Playing around with parsing using VSCode-style tokenization --- lib/Scope/Data.php | 68 +++++------------------ lib/Scope/Parser.php | 119 +++++++++++----------------------------- lib/Scope/Tokenizer.php | 40 -------------- 3 files changed, 45 insertions(+), 182 deletions(-) delete mode 100644 lib/Scope/Tokenizer.php diff --git a/lib/Scope/Data.php b/lib/Scope/Data.php index 261e2ce..7427a55 100644 --- a/lib/Scope/Data.php +++ b/lib/Scope/Data.php @@ -9,79 +9,37 @@ namespace dW\Highlighter\Scope; class Data { protected string $data; - protected int $_position = 0; + protected int $position = 0; protected int $endPosition; public function __construct(string $data) { - $this->data = $data; - $this->endPosition = strlen($data); + preg_match('/[LRB]:|[A-Za-z0-9-+_\*\.]+|[\,\|\-\(\)]/', $data, $matches); + $this->data = $matches[0] ?? []; + $this->endPosition = count($this->data); } - public function consume(int $length = 1): string|bool { - if ($this->_position === $this->endPosition) { + public function consume(): string|bool { + if ($this->position === $this->endPosition) { return false; } - $stop = $this->_position + $length - 1; - if ($stop > $this->endPosition) { - $stop = $this->endPosition; - } - - $result = ''; - while ($this->_position <= $stop) { - $result .= $this->data[$this->_position++]; - } - - return $result; - } - - public function consumeIf(string $match): string|bool { - return $this->consumeWhile($match, 1); - } - - public function consumeWhile(string $match, $limit = null): string|bool { - if ($this->_position === $this->endPosition) { - return false; - } - - $length = strspn($this->data, $match, $this->_position, $limit); - if ($length === 0) { - return ''; - } - - return $this->consume($length); + return $this->data[$this->position++]; } - public function peek(int $length = 1): string|bool { - if ($this->_position === $this->endPosition) { + public function peek(): string|bool { + if ($this->position === $this->endPosition) { return false; } - $stop = $this->_position + $length - 1; - if ($stop >= $this->endPosition) { - $stop = $this->endPosition; - } - - $output = ''; - for ($i = $this->_position; $i <= $stop; $i++) { - $output .= $this->data[$i]; - } - - return $output; + return $this->data[$this->position + 1]; } - public function unconsumeTo(int $position = 1): bool { - if ($position < 0 || $position > $this->endPosition) { + public function unconsume(): bool { + if ($this->position < 0) { return false; } - $this->_position = $position; + $this->position--; return true; } - - public function __get(string $name) { - if ($name === 'position') { - return $this->_position; - } - } } diff --git a/lib/Scope/Parser.php b/lib/Scope/Parser.php index 84b6fff..0d0a04f 100644 --- a/lib/Scope/Parser.php +++ b/lib/Scope/Parser.php @@ -7,113 +7,58 @@ declare(strict_types=1); namespace dW\Highlighter\Scope; class Parser { + protected Data $data; protected string $token; - protected Tokenizer $tokenizer; protected static Parser $instance; + protected const PREFIX_REGEX = '/^[LRB]:$/S'; + protected const SCOPE_REGEX = '/^[A-Za-z0-9-+_\.]+$/S'; - protected function __construct(string $selector) { - $this->tokenizer = new Tokenizer($selector); - } - - public static function parse(string $selector): array { + public static function parse(string $selector): Matcher|false { self::$instance = new self($selector); - - $result = []; - while (self::$instance->token = self::$instance->tokenizer->next()) { - $priority = 0; - if (strlen(self::$instance->token) === 2 && self::$instance->token[1] === ':') { - switch (self::$instance->token[0]) { - case 'R': $priority = 1; - break; - case 'L': $priority = -1; - break; - default: die('OOK!'); - } - - self::$instance->token = self::$instance->tokenizer->next(); - if (self::$instance->token === false) { - break; - } - } - - $matcher = self::parseConjunction(); - if ($matcher === false) { - $matcher = self::parseOperand(); - } - - $result[] = [ - 'matcher' => $matcher, - 'priority' => $priority - ]; - - if (self::$instance->token !== ',') { - break; - } - } - - return $result; + return self::parseSelector(); } - protected static function parseConjunction(): AndMatcher|false { - $matchers = []; - while ($matcher = self::parseOperand()) { - $matchers[] = $matcher; - } - - return (count($matchers) > 1) ? new AndMatcher($matchers[0], $matchers[1]) : false; + protected function __construct(string $selector) { + $this->data = new Data($selector); } - protected static function parseInnerExpression(): Matcher|false { - $matchers = []; - while ($matcher = self::parseConjunction()) { - $matchers[] = $matcher; - if (self::$instance->token === '|' || self::$instance->token === ',') { - do { - self::$instance->token = self::$instance->tokenizer->next(); - } while (self::$instance->token === '|' || self::$instance->token === ','); + protected static function parseSelector(): Matcher { + while (self::$instance->token = self::$instance->data->consume()) { + if (preg_match(self::PREFIX_REGEX, self::$instance->token)) { + $peek = self::$instance->data->peek(); + if ($peek === '(') { + $result = self::parseGroup(); + } elseif (preg_match(self::SCOPE_REGEX, self::$instance->token)) { + $result = self::parsePath(); + } else { + die('Group or path expected.'); + } + } elseif (preg_match(self::SCOPE_REGEX, self::$instance->token)) { + $result = self::parseScope(); + } elseif (self::$instance->token === '(') { + continue; } else { - break; + die('Group, path, or scope expected.'); } - } - - return (count($matchers) > 1) ? new OrMatcher($matchers[0], $matchers[1]) : false; - } - protected static function parseOperand(): Matcher|false { - if (self::$instance->token === '-') { - self::$instance->token = self::$instance->tokenizer->next(); - - $matcher = self::parseOperand(); - if ($matcher === false) { - die('OH SHIT'); - } - - return new NegateMatcher($matcher); + return $result; } + } - if (self::$instance->token === '(') { - self::$instance->token = self::$instance->tokenizer->next(); - $expressionInParents = self::parseInnerExpression(); - if (self::$instance->token === ')') { - self::$instance->token = self::$instance->tokenizer->next(); - } - return $expressionInParents; + protected static function parseScope(): Matcher { + if (!preg_match('/^(?:[A-Za-z0-9-_]+|\*)(?:\.(?:[A-Za-z0-9-+_]+|\*))*$/S', self::$instance->token)) { + die('Invalid scope'); } - if (self::$instance->tokenizer->tokenIsIdentifier()) { - $identifiers = []; - do { - $identifiers[] = self::$instance->token; - self::$instance->token = self::$instance->tokenizer->next(); - } while (self::$instance->tokenizer->tokenIsIdentifier()); - - return new ScopeMatcher(...$identifiers); + $segments = explode('.', $token); + foreach ($segments as $index => $segment) { + $segments[$index] = ($segment !== '*') ? new SegmentMatcher($segment) : new TrueMatcher(); } - return false; + return new ScopeMatcher(...$segments); } } diff --git a/lib/Scope/Tokenizer.php b/lib/Scope/Tokenizer.php deleted file mode 100644 index b5c729e..0000000 --- a/lib/Scope/Tokenizer.php +++ /dev/null @@ -1,40 +0,0 @@ -matches = $matches[1]; - } - - - public function next(): string|false { - if (count($this->matches) === 0) { - return false; - } - - $result = $this->matches[$this->position] ?? false; - - if ($result !== false) { - $this->position++; - } - - return $result; - } - - public function tokenIsIdentifier(): bool { - if (!isset($this->matches[$this->position])) { - return false; - } - - return (!!$this->matches[$this->position] && !!preg_match('/[\w\.:]+/', $this->matches[$this->position])); - } -} \ No newline at end of file