From 40b205fdd29a215681d4a4eafef66df84d8b619e Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Tue, 8 Feb 2022 00:38:42 -0600 Subject: [PATCH] Fixed a few bugs when testing sass grammar tokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • I -really- hate debugging this because there's no reference to go by to ensure things are correct except trial and error. • Sometimes when resolving scope names the wrong match would end up in the name. • Because of how references are handled in this implementation sometimes there'd be a leftover pattern containing a single reference when popping off the rule and scope stacks. It would cause havoc, so a bit of bullshit is needed to circumvent that. Probably can simplify it in the future because checking against the end pattern like it is probably isn't necessary, but this works at present. --- lib/Grammar.php | 5 +++-- lib/Tokenizer.php | 36 ++++++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/lib/Grammar.php b/lib/Grammar.php index 2b6ecbf..acbbab3 100644 --- a/lib/Grammar.php +++ b/lib/Grammar.php @@ -95,8 +95,9 @@ class Grammar { protected function parseJSONPattern(array $pattern, string $filename, bool $isInjection = false): Pattern|Reference|null { if (isset($pattern['include'])) { - if ($pattern['include'][0] === '#') { - return new RepositoryReference(substr($pattern['include'], 1), $this->_scopeName); + if (str_contains(needle: '#', haystack: $pattern['include'])) { + $ref = explode('#', $pattern['include']); + return new RepositoryReference($ref[1], ($ref[0] === '') ? $this->_scopeName : $ref[0]); } elseif ($pattern['include'] === '$base') { return new BaseReference($this->_scopeName); } elseif ($pattern['include'] === '$self') { diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php index 6d5ee30..2db93ea 100644 --- a/lib/Tokenizer.php +++ b/lib/Tokenizer.php @@ -22,6 +22,7 @@ class Tokenizer { // Used for debugging; assertions (`ini_set('zend.assertions', '1')`) must be // enabled to see debug output. public static bool $debug = false; + public static int $debugCount = 0; // The input Data class. protected Data $data; // The supplied Grammar used to highlight the input data. @@ -110,8 +111,8 @@ class Tokenizer { protected function resolveScopeName(string $scopeName, array $match): string { - return preg_replace_callback(self::SCOPE_RESOLVE_REGEX, function($m) use($match) { - $replacement = trim($match[(int)$m[1]][0] ?? $m[1]); + return preg_replace_callback(self::SCOPE_RESOLVE_REGEX, function($m) use($scopeName, $match) { + $replacement = trim($match[(int)(($m[1] !== '') ? $m[1] : $m[2])][0] ?? $m[1]); $command = $m[2] ?? null; switch ($command) { case 'downcase': return strtolower($replacement); @@ -271,16 +272,7 @@ class Tokenizer { } array_splice($tokens, $i, 1, $t); - - // Find the nearest index to the match that doesn't have an invalid offset value - // (meaning that particular capture matched nothing) and set the offset to the - // end of that match. - $j = count($match) - 2; - while ($match[$j][1] === -1 || $match[$j][1] === null) { - $j--; - } - - $this->offset = $match[$j][1] + strlen($match[$j][0]); + $this->offset = max($this->offset, $m[1] + strlen($m[0])); break; } } @@ -362,11 +354,31 @@ class Tokenizer { } $popped = array_pop($this->ruleStack); + // Pop the rule's name from the stack. if ($popped->name !== null) { array_pop($this->scopeStack); } + // This seems to be necessary sometimes because of how references are built + // in this implementation. They're sometimes made into patterns with a + // single subpattern. This causes some issues when popping off the stacks, so + // circumvent this bit of bullshittery below... *crosses fingers* + $end = end($this->ruleStack); + if ($end instanceof Pattern && !$end->beginPattern && !$end->endPattern && $end->match === null && $end->patterns[0] instanceof RepositoryReference) { + $rep = $end->patterns[0]->get(); + if ($rep->patterns[0]->endPattern) { + $mmm = $this->findClosestMatch($rep); + if ($mmm['pattern'] === $rep->patterns[0]) { + array_pop($this->ruleStack); + + if ($rep->name !== null) { + array_pop($this->scopeStack); + } + } + } + } + // If what was just popped is the active injection then remove it, too. if ($popped->injection) { $this->activeInjection = false;