Browse Source

Various fixes, tokenization is however now an infinite loop :/

• Added preliminary transformation of out-of-range codepoints in matches
• Fixed adoption of Grammar\Pattern objects.
• Fixed retrieval of Grammar\RepositoryReferences.
main
Dustin Wilson 3 years ago
parent
commit
4f09139e3b
  1. 8
      lib/Grammar.php
  2. 13
      lib/Grammar/Pattern.php
  3. 1
      lib/Grammar/Rule.php
  4. 5
      lib/Grammar/SelfReference.php
  5. 6
      lib/Tokenizer.php

8
lib/Grammar.php

@ -172,8 +172,6 @@ class Grammar {
throw new Exception(Exception::JSON_MISSING_PROPERTY, $filename, 'end');
}
$begin = str_replace('/', '\/', $pattern['begin']);
$p['match'] = "/$begin/u";
$modified = true;
if (isset($pattern['beginCaptures'])) {
@ -239,9 +237,15 @@ class Grammar {
$p[$key] = array_combine($k, $v);
$modified = true;
break;
case 'begin':
case 'match':
$value = str_replace('/', '\/', $value);
$value = preg_replace_callback('/\\\(x|o)\{([0-9a-fA-F]{5,})\}/', function($matches) {
$code = substr($matches[2], 0, 4);
return "\\{$matches[1]}{"."$code}";
}, $value);
$p['match'] = "/$value/u";
$modified = true;
break;
case 'contentName':

13
lib/Grammar/Pattern.php

@ -15,7 +15,6 @@ class Pattern extends Rule {
protected bool $_endPattern = false;
protected ?string $_match;
protected ?string $_name;
protected bool $_nameNeedsResolving = false;
protected \WeakReference $_ownerGrammar;
protected ?array $_patterns;
@ -29,4 +28,16 @@ class Pattern extends Rule {
$this->_endPattern = $endPattern;
$this->_ownerGrammar = ($ownerGrammar === null) ? null : \WeakReference::create($ownerGrammar);
}
// Used when adopting to change the $ownerGrammar property.
public function withOwnerGrammar(Grammar $ownerGrammar): self {
$new = parent::withOwnerGrammar($ownerGrammar);
if ($new->_patterns !== null) {
foreach ($new->_patterns as &$p) {
$p = $p->withOwnerGrammar($ownerGrammar);
}
}
return $new;
}
}

1
lib/Grammar/Rule.php

@ -20,6 +20,7 @@ abstract class Rule {
$this->_ownerGrammar = \WeakReference::create($ownerGrammar);
}
// Used when adopting to change the $ownerGrammar property.
public function withOwnerGrammar(Grammar $ownerGrammar): self {
$new = clone $this;

5
lib/Grammar/SelfReference.php

@ -7,10 +7,7 @@ declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
/**
* A weak reference to a grammar's self. This indeed doesn't have to exist, but
* exists to maintain sanity when checking types.
*/
/** A weak reference to a grammar's self. */
class SelfReference extends Reference {
public function __construct(Grammar $grammar) {
parent::__construct($grammar);

6
lib/Tokenizer.php

@ -7,7 +7,8 @@ declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Grammar\{
Pattern,
Reference
Reference,
RepositoryReference
};
use dW\Lit\Scope\{
Filter,
@ -231,11 +232,12 @@ class Tokenizer {
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into
// the rule list, and reprocess the rule.
elseif ($rule instanceof Reference && $obj = $rule->get()) {
if ($obj instanceof Grammar) {
if ($obj instanceof Grammar || $rule instanceof RepositoryReference) {
$obj = $obj->patterns;
}
array_splice($currentRules, $i, 1, $obj);
$currentRulesCount = count($currentRules);
continue;
}

Loading…
Cancel
Save