Browse Source

Scope names now resolve, starting on first line and last line anchors

main
Dustin Wilson 3 years ago
parent
commit
2f7f14dea1
  1. 53
      lib/Grammar.php
  2. 29
      lib/Grammar/CaptureList.php
  3. 83
      lib/Grammar/ImmutableList.php
  4. 14
      lib/Grammar/InjectionList.php
  5. 7
      lib/Grammar/Pattern.php
  6. 13
      lib/Grammar/Repository.php
  7. 4
      lib/Highlight.php
  8. 48
      lib/Tokenizer.php

53
lib/Grammar.php

@ -7,17 +7,12 @@ declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Grammar\{
BaseReference,
CaptureList,
ChildGrammarRegistry,
Exception,
FauxReadOnly,
GrammarReference,
InjectionList,
Node,
Pattern,
PatternList,
Reference,
Repository,
RepositoryReference,
SelfReference
};
@ -31,15 +26,15 @@ class Grammar {
use FauxReadOnly;
protected ?string $_contentName;
protected ?string $_firstLineMatch;
protected ?InjectionList $_injections;
protected ?array $_injections;
protected ?string $_name;
protected ?\WeakReference $_ownerGrammar;
protected ?PatternList $_patterns;
protected ?Repository $_repository;
protected ?array $_patterns;
protected ?array $_repository;
protected ?string $_scopeName;
public function __construct(?string $scopeName = null, ?PatternList $patterns = null, ?string $name = null, ?string $firstLineMatch = null, ?InjectionList $injections = null, ?Repository $repository = null, ?Grammar $ownerGrammar = null) {
public function __construct(?string $scopeName = null, ?array $patterns = null, ?string $name = null, ?string $firstLineMatch = null, ?array $injections = null, ?array $repository = null, ?Grammar $ownerGrammar = null) {
$this->_name = $name;
$this->_scopeName = $scopeName;
$this->_patterns = $patterns;
@ -57,19 +52,24 @@ class Grammar {
$new = clone $this;
if ($new->_patterns !== null) {
$new->_patterns = $new->_patterns->withOwnerGrammar($new);
foreach ($new->_patterns as &$p) {
$p = $p->withOwnerGrammar($new);
}
}
if ($new->_injections !== null) {
$new->_injections = $new->_injections->withOwnerGrammar($new);
foreach ($new->_injections as &$i) {
$i = $i->withOwnerGrammar($new);
}
}
if ($new->_repository !== null) {
$new->_repository = $new->_repository->withOwnerGrammar($new);
foreach ($new->_repository as &$r) {
$r = $r->withOwnerGrammar($new);
}
}
$new->_ownerGrammar = \WeakReference::create($ownerGrammar);
ChildGrammarRegistry::set($this->_scopeName, $new);
return $new;
}
@ -110,12 +110,7 @@ class Grammar {
foreach ($json['repository'] as $key => $r) {
$repository[$key] = $this->parseJSONPattern($r, $filename);
}
if (count($repository) > 0) {
$repository = new Repository($repository);
} else {
$repository = null;
}
$repository = (count($repository) > 0) ? $repository : null;
}
$this->_repository = $repository;
@ -127,12 +122,7 @@ class Grammar {
foreach ($json['injections'] as $key => $injection) {
$injections[$key] = $this->parseJSONPattern($injection, $filename);
}
if (count($injections) > 0) {
$injections = new InjectionList($injections);
} else {
$injections = null;
}
$injections = (count($injections) > 0) ? $injections : null;
}
$this->_injections = $injections;
}
@ -154,6 +144,7 @@ class Grammar {
$p = [
'ownerGrammar' => $this,
'name' => null,
'contentName' => null,
'match' => null,
'patterns' => null,
'captures' => null,
@ -173,8 +164,9 @@ class Grammar {
}
// Begin and end matches are handled in this implementation by parsing begin
// matches as regular matches and appending the end match as a pattern to the
// end of the pattern's patterns.
// matches as regular matches and appending the end match as a pattern
// to the the pattern's patterns with an end pattern flag turned on
// which is used to exit matching.
if (isset($pattern['begin'])) {
if (!isset($pattern['end'])) {
throw new Exception(Exception::JSON_MISSING_PROPERTY, $filename, 'end');
@ -244,7 +236,7 @@ class Grammar {
return $this->parseJSONPattern($n, $filename);
}, array_values($value));
$p[$key] = new CaptureList(array_combine($k, $v));
$p[$key] = array_combine($k, $v);
$modified = true;
break;
case 'match':
@ -252,6 +244,7 @@ class Grammar {
$p['match'] = "/$value/u";
$modified = true;
break;
case 'contentName':
case 'name':
$p[$key] = $value;
$modified = true;
@ -270,7 +263,7 @@ class Grammar {
return ($modified) ? new Pattern(...$p) : null;
}
protected function parseJSONPatternList(array $list, string $filename): ?PatternList {
protected function parseJSONPatternList(array $list, string $filename): ?array {
$result = [];
foreach ($list as $pattern) {
$p = $this->parseJSONPattern($pattern, $filename);
@ -279,6 +272,6 @@ class Grammar {
}
}
return (count($result) > 0) ? new PatternList(...$result) : null;
return (count($result) > 0) ? $result : null;
}
}

29
lib/Grammar/CaptureList.php

@ -1,29 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
class CaptureList extends ImmutableList {
public function __construct(array $array) {
// This shit is here because PHP doesn't have array types or generics :)
foreach ($array as $k => $v) {
if (!is_int($k)) {
throw new Exception(Exception::LIST_INVALID_TYPE, 'Integer', 'supplied array index', gettype($k));
}
if (!$v instanceof Pattern && !$v instanceof PatternList && !$v instanceof Reference) {
$type = gettype($v);
if ($type === 'object') {
$type = get_class($v);
}
throw new Exception(Exception::LIST_INVALID_TYPE, __NAMESPACE__.'\Pattern, '.__NAMESPACE__.'\PatternList, '.__NAMESPACE__.'\Reference', 'supplied array value', $type);
}
}
$this->storage = $array;
}
}

83
lib/Grammar/ImmutableList.php

@ -1,83 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
abstract class ImmutableList implements \ArrayAccess, \Countable, \Iterator {
protected int $count = 0;
protected int|string|null $position;
protected array $storage = [];
public function __construct(...$values) {
$this->storage = $values;
$this->count = count($this->storage);
}
// Used when adopting to change the $ownerGrammar property of items in the
// list.
public function withOwnerGrammar(Grammar $ownerGrammar): self {
$new = clone $this;
foreach ($new->storage as &$s) {
$s = $s->withOwnerGrammar($ownerGrammar);
}
return $new;
}
public function count(): int {
return $this->count;
}
public function current() {
return current($this->storage);
}
public function getIterator(): array {
return $this->storage;
}
public function key(){
$this->position = key($this->storage);
return $this->position;
}
public function next() {
next($this->storage);
$this->position = key($this->storage);
}
public function offsetExists($offset) {
return isset($this->storage[$offset]);
}
public function offsetGet($offset) {
if (!isset($this->storage[$offset])) {
throw new Exception(Exception::LIST_INVALID_INDEX, __CLASS__, $offset);
}
return $this->storage[$offset];
}
public function offsetSet($offset, $value) {
throw new Exception(Exception::LIST_IMMUTABLE, __CLASS__);
}
public function offsetUnset($offset) {
throw new Exception(Exception::LIST_IMMUTABLE, __CLASS__);
}
public function rewind() {
reset($this->storage);
$this->position = key($this->storage);
}
public function valid() {
return $this->offsetExists($this->position);
}
}

14
lib/Grammar/InjectionList.php

@ -1,14 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
/**
* An immutable list of injection pattern rules which allows for creation of a
* new grammar; instead of applying to an entire file it's instead applied to a
* specific scope selector.
*/
class InjectionList extends NamedPatternList {}

7
lib/Grammar/Pattern.php

@ -10,16 +10,17 @@ use dW\Lit\Grammar;
/** Contains patterns responsible for matching a portion of the document */
class Pattern extends Rule {
protected ?CaptureList $_captures;
protected ?array $_captures;
protected ?string $_contentName;
protected bool $_endPattern = false;
protected ?string $_match;
protected ?string $_name;
protected bool $_nameNeedsResolving = false;
protected \WeakReference $_ownerGrammar;
protected ?PatternList $_patterns;
protected ?array $_patterns;
public function __construct(Grammar $ownerGrammar, ?string $name = null, ?string $contentName = null, ?string $match = null, ?PatternList $patterns = null, ?CaptureList $captures = null, bool $endPattern = false) {
public function __construct(Grammar $ownerGrammar, ?string $name = null, ?string $contentName = null, ?string $match = null, ?array $patterns = null, ?array $captures = null, bool $endPattern = false) {
$this->_name = $name;
$this->_contentName = $contentName;
$this->_match = $match;

13
lib/Grammar/Repository.php

@ -1,13 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
/**
* An immutable list of rules which can be included from other places in the
* grammar; The key is the name of the rule and the value is the actual rule.
*/
class Repository extends NamedPatternList {}

4
lib/Highlight.php

@ -28,9 +28,7 @@ class Highlight {
$tokenList = $tokenizer->tokenize();
foreach ($tokenList as $lineNumber => $tokens) {
if ($lineNumber === 2) {
die(var_export($tokens));
}
die(var_export($tokens));
//echo "$lineNumber: $line\n";
}
}

48
lib/Tokenizer.php

@ -43,25 +43,31 @@ class Tokenizer {
substr($line, $this->offset, $lineLength)
);
}
yield $lineNumber => $tokens;
}
}
protected function getMatch(string $regex, string $line): ?array {
if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE, $this->offset) !== 1) {
return null;
}
return $match;
protected function resolveScopeName(string $scopeName, array $match): string {
return preg_replace_callback('/\$(\d+)|\${(\d+):\/(downcase|upcase)}/', function($m) use ($match) {
$replacement = $match[(int)$m[1]][0] ?? $m[1];
$command = $m[2] ?? null;
switch ($command) {
case 'downcase': return strtolower($replacement);
break;
case 'upcase': return strtoupper($replacement);
break;
default: return $replacement;
}
}, $scopeName);
}
protected function tokenizeLine(string $line): array {
$tokens = [];
while (true) {
$currentRules = end($this->ruleStack)->patterns->getIterator();
$currentRules = end($this->ruleStack)->patterns;
$currentRulesCount = count($currentRules);
for ($i = 0; $i < $currentRulesCount; $i++) {
@ -69,14 +75,14 @@ class Tokenizer {
$rule = $currentRules[$i];
// If the rule is a Pattern and matches the line at the offset then tokenize the
// matches.
if ($rule instanceof Pattern && $match = $this->getMatch($rule->match, $line)) {
if ($rule instanceof Pattern && preg_match($rule->match, $line, $match, PREG_OFFSET_CAPTURE, $this->offset)) {
// Add the name and contentName to the scope stack
// if present.
if ($rule->name !== null) {
$this->scopeStack[] = $rule->name;
$this->scopeStack[] = $this->resolveScopeName($rule->name, $match);
}
if ($rule->contentName !== null) {
$this->scopeStack[] = $rule->contentName;
$this->scopeStack[] = $this->resolveScopeName($rule->contentName, $match);
}
$wholeMatchCaptureScopeCount = 0;
@ -121,11 +127,11 @@ class Tokenizer {
}
if ($rule->captures[0]->name !== null) {
$this->scopeStack[] = $rule->captures[0]->name;
$this->scopeStack[] = $this->resolveScopeName($rule->captures[0]->name, $match);
$wholeMatchCaptureScopeCount++;
}
if ($rule->captures[0]->contentName !== null) {
$this->scopeStack[] = $rule->captures[0]->contentName;
$this->scopeStack[] = $this->resolveScopeName($rule->captures[0]->contentName, $match);
$wholeMatchCaptureScopeCount++;
}
}
@ -138,11 +144,11 @@ class Tokenizer {
// The scope stack for the whole match is handled above, so only handle that for
// other captures.
if ($k !== 0) {
if ($rule->captures->name !== null) {
$this->scopeStack[] = $rule->captures[$k]->name;
if ($rule->captures[$k]->name !== null) {
$this->scopeStack[] = $this->resolveScopeName($rule->captures[$k]->name, $match);
}
if ($rule->captures->contentName !== null) {
$this->scopeStack[] = $rule->captures[$k]->contentName;
if ($rule->captures[$k]->contentName !== null) {
$this->scopeStack[] = $this->resolveScopeName($rule->captures[$k]->contentName, $match);
}
}
@ -162,7 +168,7 @@ class Tokenizer {
array_pop($this->ruleStack);
} else {
$tokens[] = new Token(
[ ...$this->scopeStack, $rule->captures[$k]->name ],
[ ...$this->scopeStack, $this->resolveScopeName($rule->captures[$k]->name, $match) ],
$m[0]
);
}
@ -200,10 +206,8 @@ class Tokenizer {
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into
// the rule list, and reprocess the rule.
elseif ($rule instanceof Reference && $obj = $rule->get()) {
if ($obj instanceof PatternList) {
$obj = $obj->getIterator();
} elseif ($obj instanceof Grammar) {
$obj = $obj->patterns->getIterator();
if ($obj instanceof Grammar) {
$obj = $obj->patterns;
}
array_splice($currentRules, $i, 1, $obj);

Loading…
Cancel
Save