Browse Source

Various fixes, tokenization is however now an infinite loop :/

• Added preliminary transformation of out-of-range codepoints in matches
• Fixed adoption of Grammar\Pattern objects.
• Fixed retrieval of Grammar\RepositoryReferences.
main
Dustin Wilson 3 years ago
parent
commit
4f09139e3b
  1. 8
      lib/Grammar.php
  2. 13
      lib/Grammar/Pattern.php
  3. 1
      lib/Grammar/Rule.php
  4. 5
      lib/Grammar/SelfReference.php
  5. 6
      lib/Tokenizer.php

8
lib/Grammar.php

@ -172,8 +172,6 @@ class Grammar {
throw new Exception(Exception::JSON_MISSING_PROPERTY, $filename, 'end'); throw new Exception(Exception::JSON_MISSING_PROPERTY, $filename, 'end');
} }
$begin = str_replace('/', '\/', $pattern['begin']);
$p['match'] = "/$begin/u";
$modified = true; $modified = true;
if (isset($pattern['beginCaptures'])) { if (isset($pattern['beginCaptures'])) {
@ -239,9 +237,15 @@ class Grammar {
$p[$key] = array_combine($k, $v); $p[$key] = array_combine($k, $v);
$modified = true; $modified = true;
break; break;
case 'begin':
case 'match': case 'match':
$value = str_replace('/', '\/', $value); $value = str_replace('/', '\/', $value);
$value = preg_replace_callback('/\\\(x|o)\{([0-9a-fA-F]{5,})\}/', function($matches) {
$code = substr($matches[2], 0, 4);
return "\\{$matches[1]}{"."$code}";
}, $value);
$p['match'] = "/$value/u"; $p['match'] = "/$value/u";
$modified = true; $modified = true;
break; break;
case 'contentName': case 'contentName':

13
lib/Grammar/Pattern.php

@ -15,7 +15,6 @@ class Pattern extends Rule {
protected bool $_endPattern = false; protected bool $_endPattern = false;
protected ?string $_match; protected ?string $_match;
protected ?string $_name; protected ?string $_name;
protected bool $_nameNeedsResolving = false;
protected \WeakReference $_ownerGrammar; protected \WeakReference $_ownerGrammar;
protected ?array $_patterns; protected ?array $_patterns;
@ -29,4 +28,16 @@ class Pattern extends Rule {
$this->_endPattern = $endPattern; $this->_endPattern = $endPattern;
$this->_ownerGrammar = ($ownerGrammar === null) ? null : \WeakReference::create($ownerGrammar); $this->_ownerGrammar = ($ownerGrammar === null) ? null : \WeakReference::create($ownerGrammar);
} }
// Used when adopting to change the $ownerGrammar property.
public function withOwnerGrammar(Grammar $ownerGrammar): self {
$new = parent::withOwnerGrammar($ownerGrammar);
if ($new->_patterns !== null) {
foreach ($new->_patterns as &$p) {
$p = $p->withOwnerGrammar($ownerGrammar);
}
}
return $new;
}
} }

1
lib/Grammar/Rule.php

@ -20,6 +20,7 @@ abstract class Rule {
$this->_ownerGrammar = \WeakReference::create($ownerGrammar); $this->_ownerGrammar = \WeakReference::create($ownerGrammar);
} }
// Used when adopting to change the $ownerGrammar property. // Used when adopting to change the $ownerGrammar property.
public function withOwnerGrammar(Grammar $ownerGrammar): self { public function withOwnerGrammar(Grammar $ownerGrammar): self {
$new = clone $this; $new = clone $this;

5
lib/Grammar/SelfReference.php

@ -7,10 +7,7 @@ declare(strict_types=1);
namespace dW\Lit\Grammar; namespace dW\Lit\Grammar;
use dW\Lit\Grammar; use dW\Lit\Grammar;
/** /** A weak reference to a grammar's self. */
* A weak reference to a grammar's self. This indeed doesn't have to exist, but
* exists to maintain sanity when checking types.
*/
class SelfReference extends Reference { class SelfReference extends Reference {
public function __construct(Grammar $grammar) { public function __construct(Grammar $grammar) {
parent::__construct($grammar); parent::__construct($grammar);

6
lib/Tokenizer.php

@ -7,7 +7,8 @@ declare(strict_types=1);
namespace dW\Lit; namespace dW\Lit;
use dW\Lit\Grammar\{ use dW\Lit\Grammar\{
Pattern, Pattern,
Reference Reference,
RepositoryReference
}; };
use dW\Lit\Scope\{ use dW\Lit\Scope\{
Filter, Filter,
@ -231,11 +232,12 @@ class Tokenizer {
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into // Otherwise, if the rule is a Reference then retrieve its patterns, splice into
// the rule list, and reprocess the rule. // the rule list, and reprocess the rule.
elseif ($rule instanceof Reference && $obj = $rule->get()) { elseif ($rule instanceof Reference && $obj = $rule->get()) {
if ($obj instanceof Grammar) { if ($obj instanceof Grammar || $rule instanceof RepositoryReference) {
$obj = $obj->patterns; $obj = $obj->patterns;
} }
array_splice($currentRules, $i, 1, $obj); array_splice($currentRules, $i, 1, $obj);
$currentRulesCount = count($currentRules); $currentRulesCount = count($currentRules);
continue; continue;
} }

Loading…
Cancel
Save