diff --git a/lib/Grammar.php b/lib/Grammar.php index 2691e57..3518e51 100644 --- a/lib/Grammar.php +++ b/lib/Grammar.php @@ -7,17 +7,12 @@ declare(strict_types=1); namespace dW\Lit; use dW\Lit\Grammar\{ BaseReference, - CaptureList, ChildGrammarRegistry, Exception, FauxReadOnly, GrammarReference, - InjectionList, - Node, Pattern, - PatternList, Reference, - Repository, RepositoryReference, SelfReference }; @@ -31,15 +26,15 @@ class Grammar { use FauxReadOnly; protected ?string $_contentName; protected ?string $_firstLineMatch; - protected ?InjectionList $_injections; + protected ?array $_injections; protected ?string $_name; protected ?\WeakReference $_ownerGrammar; - protected ?PatternList $_patterns; - protected ?Repository $_repository; + protected ?array $_patterns; + protected ?array $_repository; protected ?string $_scopeName; - public function __construct(?string $scopeName = null, ?PatternList $patterns = null, ?string $name = null, ?string $firstLineMatch = null, ?InjectionList $injections = null, ?Repository $repository = null, ?Grammar $ownerGrammar = null) { + public function __construct(?string $scopeName = null, ?array $patterns = null, ?string $name = null, ?string $firstLineMatch = null, ?array $injections = null, ?array $repository = null, ?Grammar $ownerGrammar = null) { $this->_name = $name; $this->_scopeName = $scopeName; $this->_patterns = $patterns; @@ -57,19 +52,24 @@ class Grammar { $new = clone $this; if ($new->_patterns !== null) { - $new->_patterns = $new->_patterns->withOwnerGrammar($new); + foreach ($new->_patterns as &$p) { + $p = $p->withOwnerGrammar($new); + } } if ($new->_injections !== null) { - $new->_injections = $new->_injections->withOwnerGrammar($new); + foreach ($new->_injections as &$i) { + $i = $i->withOwnerGrammar($new); + } } if ($new->_repository !== null) { - $new->_repository = $new->_repository->withOwnerGrammar($new); + foreach ($new->_repository as &$r) { + $r = $r->withOwnerGrammar($new); + } } $new->_ownerGrammar = \WeakReference::create($ownerGrammar); - ChildGrammarRegistry::set($this->_scopeName, $new); return $new; } @@ -110,12 +110,7 @@ class Grammar { foreach ($json['repository'] as $key => $r) { $repository[$key] = $this->parseJSONPattern($r, $filename); } - - if (count($repository) > 0) { - $repository = new Repository($repository); - } else { - $repository = null; - } + $repository = (count($repository) > 0) ? $repository : null; } $this->_repository = $repository; @@ -127,12 +122,7 @@ class Grammar { foreach ($json['injections'] as $key => $injection) { $injections[$key] = $this->parseJSONPattern($injection, $filename); } - - if (count($injections) > 0) { - $injections = new InjectionList($injections); - } else { - $injections = null; - } + $injections = (count($injections) > 0) ? $injections : null; } $this->_injections = $injections; } @@ -154,6 +144,7 @@ class Grammar { $p = [ 'ownerGrammar' => $this, 'name' => null, + 'contentName' => null, 'match' => null, 'patterns' => null, 'captures' => null, @@ -173,8 +164,9 @@ class Grammar { } // Begin and end matches are handled in this implementation by parsing begin - // matches as regular matches and appending the end match as a pattern to the - // end of the pattern's patterns. + // matches as regular matches and appending the end match as a pattern + // to the the pattern's patterns with an end pattern flag turned on + // which is used to exit matching. if (isset($pattern['begin'])) { if (!isset($pattern['end'])) { throw new Exception(Exception::JSON_MISSING_PROPERTY, $filename, 'end'); @@ -244,7 +236,7 @@ class Grammar { return $this->parseJSONPattern($n, $filename); }, array_values($value)); - $p[$key] = new CaptureList(array_combine($k, $v)); + $p[$key] = array_combine($k, $v); $modified = true; break; case 'match': @@ -252,6 +244,7 @@ class Grammar { $p['match'] = "/$value/u"; $modified = true; break; + case 'contentName': case 'name': $p[$key] = $value; $modified = true; @@ -270,7 +263,7 @@ class Grammar { return ($modified) ? new Pattern(...$p) : null; } - protected function parseJSONPatternList(array $list, string $filename): ?PatternList { + protected function parseJSONPatternList(array $list, string $filename): ?array { $result = []; foreach ($list as $pattern) { $p = $this->parseJSONPattern($pattern, $filename); @@ -279,6 +272,6 @@ class Grammar { } } - return (count($result) > 0) ? new PatternList(...$result) : null; + return (count($result) > 0) ? $result : null; } } \ No newline at end of file diff --git a/lib/Grammar/CaptureList.php b/lib/Grammar/CaptureList.php deleted file mode 100644 index 20791ae..0000000 --- a/lib/Grammar/CaptureList.php +++ /dev/null @@ -1,29 +0,0 @@ - $v) { - if (!is_int($k)) { - throw new Exception(Exception::LIST_INVALID_TYPE, 'Integer', 'supplied array index', gettype($k)); - } - - if (!$v instanceof Pattern && !$v instanceof PatternList && !$v instanceof Reference) { - $type = gettype($v); - if ($type === 'object') { - $type = get_class($v); - } - - throw new Exception(Exception::LIST_INVALID_TYPE, __NAMESPACE__.'\Pattern, '.__NAMESPACE__.'\PatternList, '.__NAMESPACE__.'\Reference', 'supplied array value', $type); - } - } - - $this->storage = $array; - } -} \ No newline at end of file diff --git a/lib/Grammar/ImmutableList.php b/lib/Grammar/ImmutableList.php deleted file mode 100644 index 9cbeb8a..0000000 --- a/lib/Grammar/ImmutableList.php +++ /dev/null @@ -1,83 +0,0 @@ -storage = $values; - $this->count = count($this->storage); - } - - // Used when adopting to change the $ownerGrammar property of items in the - // list. - public function withOwnerGrammar(Grammar $ownerGrammar): self { - $new = clone $this; - foreach ($new->storage as &$s) { - $s = $s->withOwnerGrammar($ownerGrammar); - } - - return $new; - } - - - public function count(): int { - return $this->count; - } - - public function current() { - return current($this->storage); - } - - public function getIterator(): array { - return $this->storage; - } - - public function key(){ - $this->position = key($this->storage); - return $this->position; - } - - public function next() { - next($this->storage); - $this->position = key($this->storage); - } - - public function offsetExists($offset) { - return isset($this->storage[$offset]); - } - - public function offsetGet($offset) { - if (!isset($this->storage[$offset])) { - throw new Exception(Exception::LIST_INVALID_INDEX, __CLASS__, $offset); - } - - return $this->storage[$offset]; - } - - public function offsetSet($offset, $value) { - throw new Exception(Exception::LIST_IMMUTABLE, __CLASS__); - } - - public function offsetUnset($offset) { - throw new Exception(Exception::LIST_IMMUTABLE, __CLASS__); - } - - public function rewind() { - reset($this->storage); - $this->position = key($this->storage); - } - - public function valid() { - return $this->offsetExists($this->position); - } -} diff --git a/lib/Grammar/InjectionList.php b/lib/Grammar/InjectionList.php deleted file mode 100644 index 2cc28ba..0000000 --- a/lib/Grammar/InjectionList.php +++ /dev/null @@ -1,14 +0,0 @@ -_name = $name; $this->_contentName = $contentName; $this->_match = $match; diff --git a/lib/Grammar/Repository.php b/lib/Grammar/Repository.php deleted file mode 100644 index 29a96da..0000000 --- a/lib/Grammar/Repository.php +++ /dev/null @@ -1,13 +0,0 @@ -tokenize(); foreach ($tokenList as $lineNumber => $tokens) { - if ($lineNumber === 2) { - die(var_export($tokens)); - } + die(var_export($tokens)); //echo "$lineNumber: $line\n"; } } diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php index 51a956e..d9e068a 100644 --- a/lib/Tokenizer.php +++ b/lib/Tokenizer.php @@ -43,25 +43,31 @@ class Tokenizer { substr($line, $this->offset, $lineLength) ); } - + yield $lineNumber => $tokens; } } - protected function getMatch(string $regex, string $line): ?array { - if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE, $this->offset) !== 1) { - return null; - } - - return $match; + protected function resolveScopeName(string $scopeName, array $match): string { + return preg_replace_callback('/\$(\d+)|\${(\d+):\/(downcase|upcase)}/', function($m) use ($match) { + $replacement = $match[(int)$m[1]][0] ?? $m[1]; + $command = $m[2] ?? null; + switch ($command) { + case 'downcase': return strtolower($replacement); + break; + case 'upcase': return strtoupper($replacement); + break; + default: return $replacement; + } + }, $scopeName); } protected function tokenizeLine(string $line): array { $tokens = []; while (true) { - $currentRules = end($this->ruleStack)->patterns->getIterator(); + $currentRules = end($this->ruleStack)->patterns; $currentRulesCount = count($currentRules); for ($i = 0; $i < $currentRulesCount; $i++) { @@ -69,14 +75,14 @@ class Tokenizer { $rule = $currentRules[$i]; // If the rule is a Pattern and matches the line at the offset then tokenize the // matches. - if ($rule instanceof Pattern && $match = $this->getMatch($rule->match, $line)) { + if ($rule instanceof Pattern && preg_match($rule->match, $line, $match, PREG_OFFSET_CAPTURE, $this->offset)) { // Add the name and contentName to the scope stack // if present. if ($rule->name !== null) { - $this->scopeStack[] = $rule->name; + $this->scopeStack[] = $this->resolveScopeName($rule->name, $match); } if ($rule->contentName !== null) { - $this->scopeStack[] = $rule->contentName; + $this->scopeStack[] = $this->resolveScopeName($rule->contentName, $match); } $wholeMatchCaptureScopeCount = 0; @@ -121,11 +127,11 @@ class Tokenizer { } if ($rule->captures[0]->name !== null) { - $this->scopeStack[] = $rule->captures[0]->name; + $this->scopeStack[] = $this->resolveScopeName($rule->captures[0]->name, $match); $wholeMatchCaptureScopeCount++; } if ($rule->captures[0]->contentName !== null) { - $this->scopeStack[] = $rule->captures[0]->contentName; + $this->scopeStack[] = $this->resolveScopeName($rule->captures[0]->contentName, $match); $wholeMatchCaptureScopeCount++; } } @@ -138,11 +144,11 @@ class Tokenizer { // The scope stack for the whole match is handled above, so only handle that for // other captures. if ($k !== 0) { - if ($rule->captures->name !== null) { - $this->scopeStack[] = $rule->captures[$k]->name; + if ($rule->captures[$k]->name !== null) { + $this->scopeStack[] = $this->resolveScopeName($rule->captures[$k]->name, $match); } - if ($rule->captures->contentName !== null) { - $this->scopeStack[] = $rule->captures[$k]->contentName; + if ($rule->captures[$k]->contentName !== null) { + $this->scopeStack[] = $this->resolveScopeName($rule->captures[$k]->contentName, $match); } } @@ -162,7 +168,7 @@ class Tokenizer { array_pop($this->ruleStack); } else { $tokens[] = new Token( - [ ...$this->scopeStack, $rule->captures[$k]->name ], + [ ...$this->scopeStack, $this->resolveScopeName($rule->captures[$k]->name, $match) ], $m[0] ); } @@ -200,10 +206,8 @@ class Tokenizer { // Otherwise, if the rule is a Reference then retrieve its patterns, splice into // the rule list, and reprocess the rule. elseif ($rule instanceof Reference && $obj = $rule->get()) { - if ($obj instanceof PatternList) { - $obj = $obj->getIterator(); - } elseif ($obj instanceof Grammar) { - $obj = $obj->patterns->getIterator(); + if ($obj instanceof Grammar) { + $obj = $obj->patterns; } array_splice($currentRules, $i, 1, $obj);