From 4f09139e3bfe670035ea92e2f710e35a1c7ce388 Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Mon, 16 Aug 2021 16:36:32 -0500 Subject: [PATCH] Various fixes, tokenization is however now an infinite loop :/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Added preliminary transformation of out-of-range codepoints in matches • Fixed adoption of Grammar\Pattern objects. • Fixed retrieval of Grammar\RepositoryReferences. --- lib/Grammar.php | 8 ++++++-- lib/Grammar/Pattern.php | 13 ++++++++++++- lib/Grammar/Rule.php | 1 + lib/Grammar/SelfReference.php | 5 +---- lib/Tokenizer.php | 6 ++++-- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/lib/Grammar.php b/lib/Grammar.php index 3518e51..fd9173f 100644 --- a/lib/Grammar.php +++ b/lib/Grammar.php @@ -172,8 +172,6 @@ class Grammar { throw new Exception(Exception::JSON_MISSING_PROPERTY, $filename, 'end'); } - $begin = str_replace('/', '\/', $pattern['begin']); - $p['match'] = "/$begin/u"; $modified = true; if (isset($pattern['beginCaptures'])) { @@ -239,9 +237,15 @@ class Grammar { $p[$key] = array_combine($k, $v); $modified = true; break; + case 'begin': case 'match': $value = str_replace('/', '\/', $value); + $value = preg_replace_callback('/\\\(x|o)\{([0-9a-fA-F]{5,})\}/', function($matches) { + $code = substr($matches[2], 0, 4); + return "\\{$matches[1]}{"."$code}"; + }, $value); $p['match'] = "/$value/u"; + $modified = true; break; case 'contentName': diff --git a/lib/Grammar/Pattern.php b/lib/Grammar/Pattern.php index 28fbc91..3afe0df 100644 --- a/lib/Grammar/Pattern.php +++ b/lib/Grammar/Pattern.php @@ -15,7 +15,6 @@ class Pattern extends Rule { protected bool $_endPattern = false; protected ?string $_match; protected ?string $_name; - protected bool $_nameNeedsResolving = false; protected \WeakReference $_ownerGrammar; protected ?array $_patterns; @@ -29,4 +28,16 @@ class Pattern extends Rule { $this->_endPattern = $endPattern; $this->_ownerGrammar = ($ownerGrammar === null) ? null : \WeakReference::create($ownerGrammar); } + + // Used when adopting to change the $ownerGrammar property. + public function withOwnerGrammar(Grammar $ownerGrammar): self { + $new = parent::withOwnerGrammar($ownerGrammar); + if ($new->_patterns !== null) { + foreach ($new->_patterns as &$p) { + $p = $p->withOwnerGrammar($ownerGrammar); + } + } + + return $new; + } } \ No newline at end of file diff --git a/lib/Grammar/Rule.php b/lib/Grammar/Rule.php index a1abc0e..f1dd7e7 100644 --- a/lib/Grammar/Rule.php +++ b/lib/Grammar/Rule.php @@ -20,6 +20,7 @@ abstract class Rule { $this->_ownerGrammar = \WeakReference::create($ownerGrammar); } + // Used when adopting to change the $ownerGrammar property. public function withOwnerGrammar(Grammar $ownerGrammar): self { $new = clone $this; diff --git a/lib/Grammar/SelfReference.php b/lib/Grammar/SelfReference.php index 6a877c1..2c6b853 100644 --- a/lib/Grammar/SelfReference.php +++ b/lib/Grammar/SelfReference.php @@ -7,10 +7,7 @@ declare(strict_types=1); namespace dW\Lit\Grammar; use dW\Lit\Grammar; -/** - * A weak reference to a grammar's self. This indeed doesn't have to exist, but - * exists to maintain sanity when checking types. - */ +/** A weak reference to a grammar's self. */ class SelfReference extends Reference { public function __construct(Grammar $grammar) { parent::__construct($grammar); diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php index 249bc7d..cf7e8cc 100644 --- a/lib/Tokenizer.php +++ b/lib/Tokenizer.php @@ -7,7 +7,8 @@ declare(strict_types=1); namespace dW\Lit; use dW\Lit\Grammar\{ Pattern, - Reference + Reference, + RepositoryReference }; use dW\Lit\Scope\{ Filter, @@ -231,11 +232,12 @@ class Tokenizer { // Otherwise, if the rule is a Reference then retrieve its patterns, splice into // the rule list, and reprocess the rule. elseif ($rule instanceof Reference && $obj = $rule->get()) { - if ($obj instanceof Grammar) { + if ($obj instanceof Grammar || $rule instanceof RepositoryReference) { $obj = $obj->patterns; } array_splice($currentRules, $i, 1, $obj); + $currentRulesCount = count($currentRules); continue; }