From 1f72a86207630024216d255575b056d5dbdfae1d Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Mon, 7 Jun 2021 00:03:16 -0500 Subject: [PATCH] Working out scope parsing --- .gitignore | 1 + composer.json | 3 +- lib/Scope/Data.php | 44 +- lib/Scope/Exception.php | 13 +- lib/Scope/Matcher/PathMatcher.php | 11 - lib/Scope/Matcher/ScopeMatcher.php | 11 - .../CompositeMatcher.php | 4 +- .../{Matcher => Matchers}/GroupMatcher.php | 4 +- .../{Matcher => Matchers}/NegateMatcher.php | 4 +- lib/Scope/{Matcher => Matchers}/OrMatcher.php | 4 +- lib/Scope/Matchers/PathMatcher.php | 17 + lib/Scope/Matchers/ScopeMatcher.php | 15 + lib/Scope/Matchers/SegmentMatcher.php | 15 + lib/Scope/Matchers/TrueMatcher.php | 15 + lib/Scope/Parser.php | 378 +++++++++++++----- 15 files changed, 378 insertions(+), 161 deletions(-) delete mode 100644 lib/Scope/Matcher/PathMatcher.php delete mode 100644 lib/Scope/Matcher/ScopeMatcher.php rename lib/Scope/{Matcher => Matchers}/CompositeMatcher.php (68%) rename lib/Scope/{Matcher => Matchers}/GroupMatcher.php (68%) rename lib/Scope/{Matcher => Matchers}/NegateMatcher.php (66%) rename lib/Scope/{Matcher => Matchers}/OrMatcher.php (68%) create mode 100644 lib/Scope/Matchers/PathMatcher.php create mode 100644 lib/Scope/Matchers/ScopeMatcher.php create mode 100644 lib/Scope/Matchers/SegmentMatcher.php create mode 100644 lib/Scope/Matchers/TrueMatcher.php diff --git a/.gitignore b/.gitignore index 927bf50..9dbad95 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Temporary files and dependencies test*.php +*-old.php /vendor/ diff --git a/composer.json b/composer.json index 6867a62..75258a5 100644 --- a/composer.json +++ b/composer.json @@ -19,7 +19,8 @@ }, "autoload": { "psr-4": { - "dW\\Highlighter\\": "lib/" + "dW\\Highlighter\\": "lib/", + "dW\\Highlighter\\Scope\\": "lib/Scope/Matchers/" } } } diff --git a/lib/Scope/Data.php b/lib/Scope/Data.php index 245c84c..186c7ed 100644 --- a/lib/Scope/Data.php +++ b/lib/Scope/Data.php @@ -9,7 +9,7 @@ namespace dW\Highlighter\Scope; class Data { protected string $data; - protected int $position = 0; + protected int $_position = 0; protected int $endPosition; public function __construct(string $data) { @@ -18,21 +18,21 @@ class Data { } public function consume(int $length = 1): string|bool { - if ($this->position === $this->endPosition) { + if ($this->_position === $this->endPosition) { return false; } - $stop = $this->position + $length; - if ($stop >= $this->endPosition) { + $stop = $this->_position + $length - 1; + if ($stop > $this->endPosition) { $stop = $this->endPosition; } - $output = ''; - for ($i = $this->position; $i <= $stop; $i++) { - $output .= $this->data[$this->position++]; + $result = ''; + while ($this->_position <= $stop) { + $result .= $this->data[$this->_position++]; } - return $output; + return $result; } public function consumeIf(string $match): string|bool { @@ -40,11 +40,11 @@ class Data { } public function consumeUntil(string $match, $limit = null): string|bool { - if ($this->position === $this->endPosition) { + if ($this->_position === $this->endPosition) { return false; } - $length = strcspn($this->data, $match, $this->position + 1, $limit); + $length = strcspn($this->data, $match, $this->_position, $limit); if ($length === 0) { return ''; } @@ -53,11 +53,11 @@ class Data { } public function consumeWhile(string $match, $limit = null): string|bool { - if ($this->position === $this->endPosition) { + if ($this->_position === $this->endPosition) { return false; } - $length = strspn($this->data, $match, $this->position + 1, $limit); + $length = strspn($this->data, $match, $this->_position, $limit); if ($length === 0) { return ''; } @@ -65,29 +65,27 @@ class Data { return $this->consume($length); } - public function current(): string|bool { - if ($this->position === $this->endPosition) { - return false; - } - - return $this->data[$this->position]; - } - public function peek(int $length = 1): string|bool { - if ($this->position === $this->endPosition) { + if ($this->_position === $this->endPosition) { return false; } - $stop = $this->position + $length; + $stop = $this->_position + $length - 1; if ($stop >= $this->endPosition) { $stop = $this->endPosition; } $output = ''; - for ($i = $this->position; $i <= $stop; $i++) { + for ($i = $this->_position; $i <= $stop; $i++) { $output .= $this->data[$i]; } return $output; } + + public function __get(string $name) { + if ($name === 'position') { + return $this->_position; + } + } } diff --git a/lib/Scope/Exception.php b/lib/Scope/Exception.php index 23690a1..1201220 100644 --- a/lib/Scope/Exception.php +++ b/lib/Scope/Exception.php @@ -9,11 +9,14 @@ namespace dW\Highlighter\Scope; class Exception extends \Exception { const MESSAGE = '%s expected; found %s'; - public function __construct(array|string $expected, string $found) { - if (is_array($expected)) { - $expected = array_map(function($n) { - return ($n !== false) ? "\"$n\"" : 'end of input'; - }, $expected); + public function __construct(string $expected, string $found) { + $strlen = strlen($expected); + if ($strlen > 1) { + $temp = []; + for ($i = 0; $i < $strlen; $i++) { + $temp[] = ($expected[$i] !== false) ? "\"{$expected[$i]}\"" : 'end of input'; + } + $expected = $temp; if (count($expected) > 2) { $last = array_pop($expected); diff --git a/lib/Scope/Matcher/PathMatcher.php b/lib/Scope/Matcher/PathMatcher.php deleted file mode 100644 index 159058b..0000000 --- a/lib/Scope/Matcher/PathMatcher.php +++ /dev/null @@ -1,11 +0,0 @@ -prefix = $prefix; + $this->matchers = $matchers; + } +} diff --git a/lib/Scope/Matchers/ScopeMatcher.php b/lib/Scope/Matchers/ScopeMatcher.php new file mode 100644 index 0000000..5bf614c --- /dev/null +++ b/lib/Scope/Matchers/ScopeMatcher.php @@ -0,0 +1,15 @@ +segments = $matchers; + } +} diff --git a/lib/Scope/Matchers/SegmentMatcher.php b/lib/Scope/Matchers/SegmentMatcher.php new file mode 100644 index 0000000..f043d8b --- /dev/null +++ b/lib/Scope/Matchers/SegmentMatcher.php @@ -0,0 +1,15 @@ +segment = $segment; + } +} diff --git a/lib/Scope/Matchers/TrueMatcher.php b/lib/Scope/Matchers/TrueMatcher.php new file mode 100644 index 0000000..dd34b04 --- /dev/null +++ b/lib/Scope/Matchers/TrueMatcher.php @@ -0,0 +1,15 @@ +scopeName = $scopeName; + } +} diff --git a/lib/Scope/Parser.php b/lib/Scope/Parser.php index b96efd5..08b7416 100644 --- a/lib/Scope/Parser.php +++ b/lib/Scope/Parser.php @@ -7,15 +7,20 @@ declare(strict_types=1); namespace dW\Highlighter\Scope; class Parser { + public static $debug = false; + protected Data $data; protected array $lastExceptionData = []; + protected static $debugCount = 1; protected static Parser $instance; + protected function __construct(string $selector) { $this->data = new Data($selector); } + public static function parse(string $selector): Matcher|false { self::$instance = new self($selector); @@ -31,21 +36,22 @@ class Parser { } } - if (self::$instance->lastExceptionData !== []) { + if (self::$debug === true) { + echo "------------------------------\n"; + } + + if ($result === false && self::$instance->lastExceptionData !== []) { throw new Exception(self::$instance->lastExceptionData['expected'], self::$instance->lastExceptionData['found']); } return $result; } - protected static function fail(array|string $expected) { - self::$instance->lastExceptionData = [ - 'expected' => $expected, - 'found' => self::$instance->data->peek() - ]; - } - protected static function parseComposite(): Matcher|false { + if (self::$debug === true) { + self::debug(); + } + $result = false; $s1 = self::parseExpression(); @@ -53,14 +59,17 @@ class Parser { $s2 = self::parseSpace(); if ($s2 !== false) { $s3 = self::$instance->data->consumeIf('|&-'); - if (!in_array($s3, [ '|', '&', '-' ])) { - self::fail([ '|', '&', '-' ]); - } else { + if ($s3 === '' || $s3 === false) { + $s3 = false; + self::fail('|&-'); + } + + if ($s3 !== false) { $s4 = self::parseSpace(); if ($s4 !== false) { $s5 = self::parseComposite(); if ($s5 !== false) { - $result = new Matcher\CompositeMatcher($s1, $s3, $s5); + $result = new CompositeMatcher($s1, $s3, $s5); } } } @@ -71,84 +80,122 @@ class Parser { $result = self::parseExpression(); } + if (self::$debug === true) { + echo "parseComposite Result: " . var_export($result, true) . "\n"; + } + return $result; } protected static function parseExpression(): Matcher|false { + if (self::$debug === true) { + self::debug(); + } + $result = false; + $s1 = self::$instance->data->consumeIf('-'); - if ($s1 !== '-') { + if ($s1 === '' || $s1 === false) { + $s1 = false; self::fail('-'); } - $s2 = self::parseSpace(); - if ($s2 !== false) { - $s3 = self::parseGroup(); - if ($s3 !== false) { - $s4 = self::parseSpace(); - if ($s4 !== false) { - $result = new Matcher\NegateMatcher($s3); + if ($s1 !== false) { + $s2 = self::parseSpace(); + if ($s2 !== false) { + $s3 = self::parseGroup(); + if ($s3 !== false) { + $s4 = self::parseSpace(); + if ($s4 !== false) { + $result = new NegateMatcher($s3); + } } } } if ($result === false) { - $s1 = self::$instance->data->consumeIf('-', 1); - if ($s1 !== '-') { + $s1 = self::$instance->data->consumeIf('-'); + if ($s1 === '' || $s1 === false) { + $s1 = false; self::fail('-'); - } else { + } + + if ($s1 !== false) { $s2 = self::parseSpace(); if ($s2 !== false) { $s3 = self::parsePath(); if ($s3 !== false) { $s4 = self::parseSpace(); if ($s4 !== false) { - $result = new Matcher\NegateMatcher($s3); + $result = new NegateMatcher($s3); } } } } - } - if ($result === false) { - $result = self::parseGroup(); if ($result === false) { - $result = self::parsePath(); + $result = self::parseGroup(); + if ($result === false) { + $result = self::parsePath(); + } } } + if (self::$debug === true) { + echo "parseExpression Result: " . var_export($result, true) . "\n"; + } + return $result; } protected static function parseGroup(): Matcher|false { + if (self::$debug === true) { + self::debug(); + } + $result = false; + $prefix = null; - $s2 = self::$instance->data->consumeIf('BLR'); - if (!in_array($s2, [ 'B', 'L', 'R' ])) { - self::fail([ 'B', 'L', 'R' ]); - } else { + $s2 = self::$instance->data->consumeIf('LRB'); + if ($s2 === '' || $s2 === false) { + $s2 = false; + self::fail('LRB'); + } + + if ($s2 !== false) { $s3 = self::$instance->data->consumeIf(':'); - if ($s3 !== ':') { + if ($s3 === '' || $s3 === false) { + $s3 = false; self::fail(':'); - } else { + } + + if ($s3 !== false) { $prefix = "$s2$s3"; + } + } - $s2 = self::$instance->data->consumeIf('('); - if ($s2 !== '(') { - self::fail('('); - } else { - $s3 = self::parseSpace(); - if ($s3 !== false) { - $s4 = self::parseSelector(); - if ($s4 !== false) { - $s5 = self::parseSpace(); - if ($s5 !== false) { - $s6 = self::$instance->data->consumeIf(')'); - if ($s6 !== ')') { - self::fail(')'); - } else { - $result = new GroupMatcher($prefix, $s4); - } + if ($prefix !== null) { + $s2 = self::$instance->data->consumeIf('('); + if ($s2 === '' || $s2 === false) { + $s2 = false; + self::fail('('); + } + + if ($s2 !== false) { + $s3 = self::parse(); + if ($s3 !== false) { + $s4 = self::parseSelector(); + if ($s4 !== false) { + $s5 = self::parseSpace(); + if ($s5 !== false) { + $s6 = self::$instance->data->consumeIf(')'); + if ($s6 === '' || $s6 === false) { + $s6 = false; + self::fail(')'); + } + + if ($s6 !== false) { + $result = new GroupMatcher($prefix, $s4); } } } @@ -156,114 +203,241 @@ class Parser { } } + if (self::$debug === true) { + echo "parseGroup Result: " . var_export($result, true) . "\n"; + } + return $result; } protected static function parsePath(): Matcher|false { + if (self::$debug === true) { + self::debug(); + } + $result = false; + $prefix = null; + + $s2 = self::$instance->data->consumeIf('LRB'); + if ($s2 === '' || $s2 === false) { + $s2 = false; + self::fail('LRB'); + } - $s2 = self::$instance->data->consumeIf('BLR'); - if (!in_array($s2, [ 'B', 'L', 'R' ])) { - self::fail([ 'B', 'L', 'R' ]); - } else { + if ($s2 !== false) { $s3 = self::$instance->data->consumeIf(':'); - if ($s3 !== ':') { + if ($s3 === '' || $s3 === false) { + $s3 = false; self::fail(':'); - } else { + } + + if ($s3 !== false) { $prefix = "$s2$s3"; + } + } - $s2 = self::parseScope(); - if ($s2 !== false) { - $s3 = ''; - $s4 = ''; + $s2 = self::parseScope(); + if ($s2 !== false) { + $s3 = [$s2]; + + do { + $s4 = false; + $s5 = self::parseSpace(); + if ($s5 !== false) { + $s6 = self::parseScope(); + if ($s6 !== false) { + $s3[] = $s6; + } + } + } while ($s4 !== false); - while ($s4 !== false) { - $s3 .= $s4; - $s4 = false; + $result = new PathMatcher($prefix, ...$s3); + } - $s5 = self::parseSpace(); - if ($s5 !== false) { - $s6 = self::parseScope(); - if ($s6 !== false) { - $s4 = "$s5$s6"; - } - } + if (self::$debug === true) { + echo "parsePath Result: " . var_export($result, true) . "\n"; + } + + return $result; + } + + protected static function parseSegment(): SegmentMatcher|TrueMatcher|false { + if (self::$debug === true) { + self::debug(); + } + + $result = false; + + $s1 = self::parseSpace(); + if ($s1 !== false) { + $s2 = self::$instance->data->consumeWhile('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_'); + if ($s2 === '' || $s2 === false) { + $s2 = false; + self::fail('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_'); + } + + if ($s2 !== false) { + $s3 = self::$instance->data->consumeWhile('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_'); + if ($s3 === '' || $s2 === false) { + $s3 = false; + self::fail('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_'); + } else { + $s2 .= $s3; + } + } + + if ($s2 !== false) { + $s3 = self::parseSpace(); + if ($s3 !== false) { + $result = new SegmentMatcher($s2); + } + } + + if ($result === false) { + $s1 = self::parseSpace(); + if ($s1 !== false) { + $s2 = self::$instance->data->consumeIf('*'); + if ($s2 === '' || $s2 === false) { + $s2 = false; + self::fail('*'); } - if (strlen($s3) > 0) { - $result = new Matcher\PathMatcher($prefix, $s2, $s3); + if ($s2 !== false) { + $s3 = self::parseSpace(); + if ($s3 !== false) { + $result = new TrueMatcher($s2); + } } } } } - return $result; - } - - protected static function parseSpace(): string|false { - return self::$instance->data->consumeIf(" \t"); - } + if (self::$debug === true) { + echo "parseSegment Result: " . var_export($result, true) . "\n"; + } - protected static function parseSegment(): string|false { - return false; + return $result; } protected static function parseSelector(): Matcher|false { + if (self::$debug === true) { + self::debug(); + } + $result = false; + $s1 = self::parseComposite(); if ($s1 !== false) { $s2 = self::parseSpace(); if ($s2 !== false) { $s3 = self::$instance->data->consumeIf(','); - if ($s3 !== ',') { + if ($s3 === '' || $s3 === false) { + $s3 = false; self::fail(','); - } else { + } + + if ($s3 !== false) { $s4 = self::parseSpace(); if ($s4 !== false) { $s5 = self::parseSelector(); - if ($s5 !== false) { - $result = new Matcher\OrMatcher($s1, $s5); - } + $result = ($s5 === false) ? $s1 : new OrMatcher($s1, $s5); } } } } - if ($result === false) { - $result = self::parseComposite(); + if (self::$debug === true) { + echo "parseSelector Result: " . var_export($result, true) . "\n"; } return $result; } - protected static function parseScope(): string|false { + protected static function parseScope(): ScopeMatcher|false { + if (self::$debug === true) { + self::debug(); + } + $result = false; $s1 = self::parseSegment(); if ($s1 !== false) { - $s2 = ''; - $s3 = ''; - - while ($s3 !== false) { - $s2 .= $s3; + $s2 = [$s1]; + do { $s3 = false; $s4 = self::$instance->data->consumeIf('.'); - if ($s4 !== '.') { + if ($s4 === '' || $s4 === false) { + $s4 = false; self::fail('.'); - } else { - $s5 = self::parseSegment(); - if ($s5 !== false) { - $s3 = "$s4$s5"; + } + + if ($s4 !== false) { + $s3 = self::parseSegment(); + if ($s3 !== false) { + $s2[] = $s3; } } - } + } while ($s3 !== false); - if (strlen($s2) > 0) { - $result = new Matcher\ScopeMatcher($s1, $s2); - } + $result = new ScopeMatcher(...$s2); + } + + if (self::$debug === true) { + echo "parseScope Result: " . var_export($result, true) . "\n"; } return $result; } + + protected static function parseSpace(): string|false { + if (self::$debug === true) { + self::debug(); + } + + $result = self::$instance->data->consumeWhile(" \t"); + if ($result === false) { + self::fail(' \t'); + } + + if (self::$debug === true) { + echo "parseSpace Result: " . var_export($result, true) . "\n"; + } + + return $result; + } + + + protected static function debug() { + $message = <<'), + self::$instance->data->position, + self::$instance->data->peek() + ); + } + + protected static function fail(string $expected) { + self::$instance->lastExceptionData = [ + 'expected' => $expected, + 'found' => self::$instance->data->peek() + ]; + } }