Browse Source

Tokenizing stuff... maybe? :)

main
Dustin Wilson 3 years ago
parent
commit
1763653eca
  1. 1
      lib/Data.php
  2. 1
      lib/FauxReadOnly.php
  3. 45
      lib/Grammar.php
  4. 4
      lib/Grammar/GrammarReference.php
  5. 4
      lib/Grammar/ImmutableList.php
  6. 2
      lib/Grammar/InjectionList.php
  7. 7
      lib/Grammar/NamedPatternList.php
  8. 1
      lib/Grammar/PatternList.php
  9. 4
      lib/Grammar/Reference.php
  10. 2
      lib/Grammar/Repository.php
  11. 13
      lib/Grammar/RepositoryReference.php
  12. 31
      lib/Grammar/SelfReference.php
  13. 1
      lib/GrammarRegistry.php
  14. 1
      lib/Highlight.php
  15. 77
      lib/Tokenizer.php

1
lib/Data.php

@ -6,6 +6,7 @@
declare(strict_types=1);
namespace dW\Lit;
class Data {
public static function fileToGenerator(string $filepath): \Generator {
$lineNumber = 0;

1
lib/FauxReadOnly.php

@ -6,6 +6,7 @@
declare(strict_types=1);
namespace dW\Lit;
trait FauxReadOnly {
public function __get(string $name) {
$prop = "_$name";

45
lib/Grammar.php

@ -5,17 +5,19 @@
declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Grammar\BaseReference,
dW\Lit\Grammar\CaptureList,
dW\Lit\Grammar\Exception,
dW\Lit\Grammar\GrammarReference,
dW\Lit\Grammar\InjectionList,
dW\Lit\Grammar\Pattern,
dW\Lit\Grammar\PatternList,
dW\Lit\Grammar\Reference,
dW\Lit\Grammar\Repository,
dW\Lit\Grammar\RepositoryReference,
dW\Lit\Grammar\SelfReference;
use dW\Lit\Grammar\{
BaseReference,
CaptureList,
Exception,
GrammarReference,
InjectionList,
Pattern,
PatternList,
Reference,
Repository,
RepositoryReference,
SelfReference
};
/**
@ -50,7 +52,7 @@ class Grammar {
/** Clones the supplied grammar with this grammar set as its owner grammar */
public function adoptGrammar(self $grammar): self {
return new self($grammar->name, $grammar->scopeName, $grammar->contentScopeName, $grammar->patterns, $grammar->contentRegex, $grammar->firstLineMatch, $grammar->injections, $grammar->repository, $this);
return new self($grammar->scopeName, $grammar->contentScopeName, $grammar->patterns, $grammar->name, $grammar->contentRegex, $grammar->firstLineMatch, $grammar->injections, $grammar->repository, $this);
}
@ -76,8 +78,20 @@ class Grammar {
$this->_name = $json['name'] ?? null;
$this->_scopeName = $json['scopeName'];
$this->_contentScopeName = $json['contentScopeName'] ?? null;
$this->_contentRegex = (isset($json['contentRegex'])) ? "/{$json['contentRegex']}/" : null;
$this->_firstLineMatch = (isset($json['firstLineMatch'])) ? "/{$json['firstLineMatch']}/" : null;
if (isset($json['contentRegex'])) {
$value = str_replace('/', '\/', $json['contentRegex']);
$this->_contentRegex = $value;
} else {
$this->_contentRegex = null;
}
if (isset($json['firstLineMatch'])) {
$value = str_replace('/', '\/', $json['firstLineMatch']);
$this->_firstLineMatch = $value;
} else {
$this->_firstLineMatch = null;
}
$repository = null;
if (isset($json['repository'])) {
@ -120,7 +134,7 @@ class Grammar {
} elseif ($pattern['include'] === '$base') {
return new BaseReference($this);
} elseif ($pattern['include'] === '$self') {
return \WeakReference::create($this);
return SelfReference::create($this);
} else {
return new GrammarReference($pattern['include'], $this);
}
@ -156,6 +170,7 @@ class Grammar {
case 'begin':
case 'end':
case 'match':
$value = str_replace('/', '\/', $value);
$p[$key] = "/$value/";
$modified = true;
break;

4
lib/Grammar/GrammarReference.php

@ -13,7 +13,7 @@ use dW\Lit\Grammar,
* Acts as a sort of lazy reference for entire grammars in grammars.
*/
class GrammarReference extends Reference {
protected ?Grammar $object;
protected ?Grammar $object = null;
protected \WeakReference $ownerGrammar;
protected string $_scopeName;
@ -37,7 +37,7 @@ class GrammarReference extends Reference {
return null;
}
$this->object = $this->ownerGrammar->get()->adopt($grammar);
$this->object = $this->ownerGrammar->get()->adoptGrammar($grammar);
return $this->object;
}
}

4
lib/Grammar/ImmutableList.php

@ -26,6 +26,10 @@ abstract class ImmutableList implements \ArrayAccess, \Countable, \Iterator {
return current($this->storage);
}
public function getIterator(): array {
return $this->storage;
}
public function key(){
$this->position = key($this->storage);
return $this->position;

2
lib/Grammar/InjectionList.php

@ -11,4 +11,4 @@ namespace dW\Lit\Grammar;
* new grammar; instead of applying to an entire file it's instead applied to a
* specific scope selector.
*/
class InjectionList extends NamedPatternListList {}
class InjectionList extends NamedPatternList {}

7
lib/Grammar/NamedPatternListList.php → lib/Grammar/NamedPatternList.php

@ -7,7 +7,8 @@ declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly;
abstract class NamedPatternListList extends ImmutableList {
/** Immutable named pattern list used for repositories and injection lists. */
abstract class NamedPatternList extends ImmutableList {
use FauxReadOnly;
public function __construct(array $array) {
@ -17,13 +18,13 @@ abstract class NamedPatternListList extends ImmutableList {
throw new Exception(Exception::LIST_INVALID_TYPE, 'String', 'supplied array index', gettype($k));
}
if (!$v instanceof Pattern && !$v instanceof PatternList && !$v instanceof Reference && !$v instanceof \WeakReference) {
if (!$v instanceof Pattern && !$v instanceof PatternList && !$v instanceof Reference) {
$type = gettype($v);
if ($type === 'object') {
$type = get_class($v);
}
throw new Exception(Exception::LIST_INVALID_TYPE, __NAMESPACE__.'\Pattern, '.__NAMESPACE__.'\PatternList, '.__NAMESPACE__.'\Reference, or \WeakReference', 'supplied array value', $type);
throw new Exception(Exception::LIST_INVALID_TYPE, __NAMESPACE__.'\Pattern, '.__NAMESPACE__.'\PatternList, '.__NAMESPACE__.'\Reference', 'supplied array value', $type);
}
}

1
lib/Grammar/PatternList.php

@ -7,6 +7,7 @@ declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
/** Immutable list of pattern rules */
class PatternList extends ImmutableList {
public function __construct(Pattern|Reference|\WeakReference ...$values) {

4
lib/Grammar/Reference.php

@ -6,7 +6,5 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
/**
* Acts as a sort of lazy reference for including self in a grammar.
*/
/** Acts as a catch-all type for references */
abstract class Reference extends Rule {}

2
lib/Grammar/Repository.php

@ -10,4 +10,4 @@ namespace dW\Lit\Grammar;
* An immutable list of rules which can be included from other places in the
* grammar; The key is the name of the rule and the value is the actual rule.
*/
class Repository extends NamedPatternListList {}
class Repository extends NamedPatternList {}

13
lib/Grammar/RepositoryReference.php

@ -12,25 +12,28 @@ use dW\Lit\Grammar;
* Acts as a sort of lazy reference for repository items in grammars.
*/
class RepositoryReference extends Reference {
protected \WeakReference $grammar;
protected ?Grammar $grammar;
protected string $_name;
protected PatternList|Pattern|null|false $object;
protected PatternList|Pattern|null|false $object = null;
public function __construct(string $name, Grammar $grammar) {
$this->_name = $name;
$this->grammar = \WeakReference::create($grammar);
// Using a \WeakReference here doesn't work for some reason even though
// the grammar would still be stored in memory. Cloning works because grammars
// are immutable, so the referenced object never will change.
$this->grammar = clone $grammar;
}
public function get(): PatternList|Pattern {
public function get(): PatternList|Pattern|null {
if ($this->object !== null) {
return $this->object;
} elseif ($this->object === false) {
return null;
}
$grammar = $this->grammar->get();
$grammar = $this->grammar;
if (!isset($grammar->repository[$this->name])) {
$this->object = false;
return null;

31
lib/Grammar/SelfReference.php

@ -0,0 +1,31 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
/**
* A weak reference to a grammar's self. This indeed doesn't have to exist, but
* exists to maintain sanity when checking types.
*/
class SelfReference extends Reference {
protected ?Grammar $grammar;
public function __construct(Grammar $grammar) {
$this->grammar = $grammar;
}
public function __destruct() {
parent::__destruct();
$this->grammar = null;
}
public function get(): Grammar {
return $this->grammar;
}
}

1
lib/GrammarRegistry.php

@ -6,6 +6,7 @@
declare(strict_types=1);
namespace dW\Lit;
/** Static storage for grammars; a map of a scope string and a Grammar object */
class GrammarRegistry implements \IteratorAggregate {
protected static array $storage = [];

1
lib/Highlight.php

@ -7,6 +7,7 @@ declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Grammar\Exception;
class Highlight {
public static function withFile(string $filepath, string $scopeName) {
return self::highlight(Data::fileToGenerator($filepath), $scopeName);

77
lib/Tokenizer.php

@ -5,28 +5,93 @@
declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Scope\Parser as ScopeParser;
use dW\Lit\Grammar\{
Pattern,
Reference
};
use dW\Lit\Scope\Parser as ScopeParser,
dW\Lit\Grammar\Pattern,
dW\Lit\Grammar\RepositoryReference;
class Tokenizer {
protected \Generator $data;
protected Grammar $grammar;
protected array $ruleStack;
protected array $scopeStack;
public function __construct(\Generator $data, Grammar $grammar) {
$this->data = $data;
$this->grammar = $grammar;
$this->ruleStack = [ $this->grammar ];
$this->scopeStack = [ $this->grammar->scopeName ];
if ($this->grammar->contentScopeName !== null) {
$this->scopeStack[] = $this->grammar->contentScopeName;
}
}
public function tokenize(): \Generator {
foreach ($this->data as $lineNumber => $line) {
yield $lineNumber => $line;
foreach ($this->data as $lineNumber => $inputLine) {
$currentRules = end($this->ruleStack)->patterns->getIterator();
$currentRulesCount = count($currentRules);
$results = [];
$line = $inputLine;
for ($i = 0; $i < $currentRulesCount; $i++) {
while (true) {
$rule = $currentRules[$i];
if ($rule instanceof Pattern) {
$regex = null;
if ($rule->match !== null) {
$regex = $rule->match;
} elseif ($rule->begin !== null) {
$regex = $rule->begin;
}
if ($regex !== null && $match = $this->getMatch($regex, $line)) {
$scopeStack = $this->scopeStack;
if ($this->rule->name !== null) {
$scopeStack[] = $this->rule->name;
}
if ($this->rule->contentName !== null) {
$scopeStack[] = $this->rule->contentName;
}
$results[] = [
'scopeStack' => $scopeStack,
'matches' => $matches
];
if ($rule->begin !== null) {
$this->ruleStack[] = $rule;
$this->scopeStack[] = $scopeStack;
}
}
} elseif ($rule instanceof Reference && $obj = $rule->get()) {
if ($obj instanceof PatternList) {
$obj = $obj->getIterator();
} elseif ($obj instanceof Grammar) {
$obj = $obj->patterns->getIterator();
}
array_splice($currentRules, $i, 1, $obj);
$currentRulesCount = count($currentRules);
continue;
}
break;
}
}
}
}
protected function getMatch(string $regex, string $line): array {
if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE) !== 1) {
return null;
}
return $match;
}
}
Loading…
Cancel
Save