Browse Source

Trying to start code tokenization

main
Dustin Wilson 3 years ago
parent
commit
33e411ec63
  1. 6
      lib/Grammar.php
  2. 39
      lib/Grammar/GrammarInclude.php
  3. 6
      lib/Grammar/GrammarReference.php
  4. 2
      lib/Grammar/ImmutableList.php
  5. 6
      lib/Grammar/Pattern.php
  6. 2
      lib/Grammar/Reference.php
  7. 6
      lib/Grammar/RepositoryReference.php
  8. 16
      lib/Grammar/Rule.php
  9. 9
      lib/Tokenizer.php

6
lib/Grammar.php

@ -34,9 +34,10 @@ class Grammar {
protected ?PatternList $_patterns;
protected ?Repository $_repository;
protected ?string $_scopeName;
protected ?string $_contentScopeName;
public function __construct(?string $scopeName = null, ?PatternList $patterns = null, ?string $name = null, ?string $contentRegex = null, ?string $firstLineMatch = null, ?InjectionList $injections = null, ?Repository $repository = null, ?Grammar $ownerGrammar = null) {
public function __construct(?string $scopeName = null, ?string $contentScopeName = null, ?PatternList $patterns = null, ?string $name = null, ?string $contentRegex = null, ?string $firstLineMatch = null, ?InjectionList $injections = null, ?Repository $repository = null, ?Grammar $ownerGrammar = null) {
$this->_name = $name;
$this->_scopeName = $scopeName;
$this->_patterns = $patterns;
@ -50,7 +51,7 @@ class Grammar {
/** Clones the supplied grammar with this grammar set as its owner grammar */
public function adoptGrammar(self $grammar): self {
return new self($grammar->name, $grammar->scopeName, $grammar->patterns, $grammar->contentRegex, $grammar->firstLineMatch, $grammar->injections, $this, $grammar->repository);
return new self($grammar->name, $grammar->scopeName, $grammar->contentScopeName, $grammar->patterns, $grammar->contentRegex, $grammar->firstLineMatch, $grammar->injections, $grammar->repository, $this);
}
@ -75,6 +76,7 @@ class Grammar {
$this->_name = $json['name'] ?? null;
$this->_scopeName = $json['scopeName'];
$this->_contentScopeName = $json['contentScopeName'] ?? null;
$this->_contentRegex = (isset($json['contentRegex'])) ? "/{$json['contentRegex']}/" : null;
$this->_firstLineMatch = (isset($json['firstLineMatch'])) ? "/{$json['firstLineMatch']}/" : null;

39
lib/Grammar/GrammarInclude.php

@ -1,39 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly;
/**
* This allows for referencing a different language, recursively referencing the
* grammar itself, or a rule declared in the file's repository.
*/
class GrammarInclude {
use FauxReadOnly;
const BASE_TYPE = 0;
const REPOSITORY_TYPE = 1;
const SCOPE_TYPE = 2;
const SELF_TYPE = 3;
protected ?string $_name;
protected int $_type;
public function __construct(string $string) {
if ($string[0] === '#') {
$this->_type = self::REPOSITORY_TYPE;
$this->_name = substr($string, 1);
} elseif ($string === '$base') {
$this->_type = self::BASE_TYPE;
} elseif ($string === '$self') {
$this->_type = self::SELF_TYPE;
} else {
$this->_type = self::SCOPE_TYPE;
$this->_name = $string;
}
}
}

6
lib/Grammar/GrammarReference.php

@ -5,15 +5,13 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly,
dW\Lit\Grammar;
use dW\Lit\Grammar;
/**
* Acts as a sort of lazy reference for entire grammars in grammars.
*/
class GrammarReference extends Reference {
use FauxReadOnly;
protected ?Grammar $object;
protected \WeakReference $ownerGrammar;
protected string $_scopeName;

2
lib/Grammar/ImmutableList.php

@ -44,7 +44,7 @@ abstract class ImmutableList implements \ArrayAccess, \Countable, \Iterator {
if (!isset($this->storage[$offset])) {
throw new Exception(Exception::LIST_INVALID_INDEX, __CLASS__, $offset);
}
return $this->storage[$offset];
}

6
lib/Grammar/Pattern.php

@ -5,13 +5,11 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly;
use dW\Lit\Grammar;
/** Contains patterns responsible for matching a portion of the document */
class Pattern {
use FauxReadOnly;
/** Contains patterns responsible for matching a portion of the document */
class Pattern extends Rule {
protected bool $_applyEndPatternLast = false;
protected ?string $_begin;
protected ?CaptureList $_beginCaptures;

2
lib/Grammar/Reference.php

@ -9,4 +9,4 @@ namespace dW\Lit\Grammar;
/**
* Acts as a sort of lazy reference for including self in a grammar.
*/
abstract class Reference {}
abstract class Reference extends Rule {}

6
lib/Grammar/RepositoryReference.php

@ -5,15 +5,13 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly,
dW\Lit\Grammar;
use dW\Lit\Grammar;
/**
* Acts as a sort of lazy reference for repository items in grammars.
*/
class RepositoryReference extends Reference {
use FauxReadOnly;
protected \WeakReference $grammar;
protected string $_name;
protected PatternList|Pattern|null|false $object;

16
lib/Grammar/Rule.php

@ -0,0 +1,16 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly;
/**
* Abstract class used as a base class for Pattern and Reference classes
*/
abstract class Rule {
use FauxReadOnly;
}

9
lib/Tokenizer.php

@ -6,20 +6,25 @@
declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Scope\Parser as ScopeParser,
dW\Lit\Grammar\Pattern,
dW\Lit\Grammar\RepositoryReference;
class Tokenizer {
protected \Generator $data;
protected Grammar $grammar;
protected array $ruleStack;
public function __construct(\Generator $data, Grammar $grammar) {
$this->data = $data;
$this->grammar = $grammar;
$this->ruleStack = [ $this->grammar ];
}
public function tokenize(): \Generator {
$ruleStack = [ $this->grammar ];
foreach ($this->data as $lineNumber => $line) {
yield $lineNumber => $line;
}

Loading…
Cancel
Save