Browse Source

Removed weak references from grammars

• Originally I had a concept of a readonly node tree for grammars with nodes owning other nodes thinking it would be necessary when tokenizing. It isn't, so they're more trouble than they're worth.
• "ownership" in Grammar\Reference objects is handled by an ownerGrammarScopeName property which is then used to get the grammar from the GrammarRegistry.
main
Dustin Wilson 3 years ago
parent
commit
005e394076
  1. 51
      lib/Grammar.php
  2. 24
      lib/Grammar/BaseReference.php
  3. 61
      lib/Grammar/ChildGrammarRegistry.php
  4. 10
      lib/Grammar/Exception.php
  5. 26
      lib/Grammar/FauxReadOnly.php
  6. 28
      lib/Grammar/GrammarReference.php
  7. 33
      lib/Grammar/NamedPatternList.php
  8. 16
      lib/Grammar/Pattern.php
  9. 16
      lib/Grammar/PatternList.php
  10. 11
      lib/Grammar/Reference.php
  11. 26
      lib/Grammar/RepositoryReference.php
  12. 17
      lib/Grammar/Rule.php
  13. 14
      lib/Grammar/SelfReference.php
  14. 3
      lib/GrammarRegistry.php
  15. 17
      lib/Tokenizer.php

51
lib/Grammar.php

@ -7,9 +7,7 @@ declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Grammar\{
BaseReference,
ChildGrammarRegistry,
Exception,
FauxReadOnly,
GrammarReference,
Pattern,
Reference,
@ -27,49 +25,17 @@ class Grammar {
protected ?string $_contentName;
protected ?array $_injections;
protected ?string $_name;
protected ?\WeakReference $_ownerGrammar;
protected ?array $_patterns;
protected ?array $_repository;
protected ?string $_scopeName;
public function __construct(?string $scopeName = null, ?array $patterns = null, ?string $name = null, ?array $injections = null, ?array $repository = null, ?Grammar $ownerGrammar = null) {
public function __construct(?string $scopeName = null, ?array $patterns = null, ?string $name = null, ?array $injections = null, ?array $repository = null) {
$this->_name = $name;
$this->_scopeName = $scopeName;
$this->_patterns = $patterns;
$this->_injections = $injections;
$this->_repository = $repository;
$this->_ownerGrammar = (is_null($ownerGrammar)) ? null : \WeakReference::create($ownerGrammar);
}
// Used when adopting to change the $ownerGrammar property.
public function withOwnerGrammar(Grammar $ownerGrammar): self {
if ($new = ChildGrammarRegistry::get($this->_scopeName, $ownerGrammar)) {
return $new;
}
$new = clone $this;
if ($new->_patterns !== null) {
foreach ($new->_patterns as &$p) {
$p = $p->withOwnerGrammar($new);
}
}
if ($new->_injections !== null) {
foreach ($new->_injections as &$i) {
$i = $i->withOwnerGrammar($new);
}
}
if ($new->_repository !== null) {
foreach ($new->_repository as &$r) {
$r = $r->withOwnerGrammar($new);
}
}
$new->_ownerGrammar = \WeakReference::create($ownerGrammar);
ChildGrammarRegistry::set($this->_scopeName, $new);
return $new;
}
@ -105,7 +71,7 @@ class Grammar {
}
$this->_repository = $repository;
$this->_patterns = $this->parseJSONPatternList($json['patterns'], $filename);
$this->_patterns = $this->parseJSONPatternArray($json['patterns'], $filename);
$injections = null;
if (isset($json['injections'])) {
@ -122,18 +88,17 @@ class Grammar {
protected function parseJSONPattern(array $pattern, string $filename): Pattern|Reference|null {
if (isset($pattern['include'])) {
if ($pattern['include'][0] === '#') {
return new RepositoryReference(substr($pattern['include'], 1), $this);
return new RepositoryReference(substr($pattern['include'], 1), $this->_scopeName);
} elseif ($pattern['include'] === '$base') {
return new BaseReference($this);
return new BaseReference($this->_scopeName);
} elseif ($pattern['include'] === '$self') {
return new SelfReference($this);
return new SelfReference($this->_scopeName);
} else {
return new GrammarReference($pattern['include'], $this);
return new GrammarReference($pattern['include'], $this->_scopeName);
}
}
$p = [
'ownerGrammar' => $this,
'name' => null,
'contentName' => null,
'match' => null,
@ -255,7 +220,7 @@ class Grammar {
throw new Exception(Exception::JSON_INVALID_TYPE, 'Array', $key, gettype($value), $filename);
}
$p[$key] = $this->parseJSONPatternList($value, $filename);
$p[$key] = $this->parseJSONPatternArray($value, $filename);
$modified = true;
break;
}
@ -264,7 +229,7 @@ class Grammar {
return ($modified) ? new Pattern(...$p) : null;
}
protected function parseJSONPatternList(array $list, string $filename): ?array {
protected function parseJSONPatternArray(array $list, string $filename): ?array {
$result = [];
foreach ($list as $pattern) {
$p = $this->parseJSONPattern($pattern, $filename);

24
lib/Grammar/BaseReference.php

@ -5,26 +5,10 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
/**
* Acts as a sort of lazy weak reference for a base grammar in a grammar.
* Base references in this implementation are simply used as a type. The
* tokenizer stores the base grammar because it's simply the lowest item on the
* stack and simply uses it when encountering a Base reference.
*/
class BaseReference extends Reference {
protected ?\WeakReference $object;
public function get(): Grammar {
if ($this->object !== null) {
return $this->object->get();
}
$grammar = $this->_ownerGrammar->get();
do {
$result = $grammar;
} while ($grammar = $grammar->ownerGrammar);
$this->object = $result;
return $result->get();
}
}
class BaseReference extends Reference {}

61
lib/Grammar/ChildGrammarRegistry.php

@ -1,61 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
/**
* Static storage for child grammars; a map of a scope string and a Grammar
* object and checked against an owner grammar. Exists to prevent multiple clones
* of the same grammar from being created and also to give weak references a
* place in memory to access.
*/
class ChildGrammarRegistry {
protected static array $storage = [];
public static function clear(): bool {
self::$storage = [];
return true;
}
public static function get(string $scopeName, Grammar $ownerGrammar): ?Grammar {
if (!array_key_exists($scopeName, self::$storage)) {
return null;
}
$grammars = self::$storage[$scopeName];
foreach ($grammars as $g) {
if ($g->ownerGrammar === $ownerGrammar) {
return $g;
}
}
return null;
}
public static function set(string $scopeName, Grammar $grammar): bool {
try {
if (!array_key_exists($scopeName, self::$storage)) {
self::$storage[$scopeName] = [ $grammar ];
return true;
}
$grammars = self::$storage[$scopeName];
foreach ($grammars as $key => $value) {
if ($value->ownerGrammar === $grammar->ownerGrammar) {
return false;
}
}
self::$storage[$scopeName][] = $grammar;
} catch (\Exception $e) {
return false;
}
return true;
}
}

10
lib/Grammar/Exception.php

@ -25,10 +25,6 @@ class Exception extends \Exception {
const JSON_MISSING_PROPERTY = 210;
const JSON_INVALID_TYPE = 211;
const LIST_IMMUTABLE = 300;
const LIST_INVALID_INDEX = 301;
const LIST_INVALID_TYPE = 302;
const GRAMMAR_MISSING = 400;
protected static $messages = [
@ -50,11 +46,7 @@ class Exception extends \Exception {
210 => '"%1$s" does not have the required %2$s property',
211 => '%1$s expected for %2$s, found %3$s in "%4$s"',
300 => '%s is immutable',
301 => 'Invalid %1$s index at offset %2$s',
302 => '%1$s expected for %2$s, found %3$s',
400 => 'A grammar for scope %s does not exist; one may be added using GrammarRegistry::set'
300 => 'A grammar for scope %s does not exist; one may be added using GrammarRegistry::set'
];
public function __construct(int $code, ...$args) {

26
lib/Grammar/FauxReadOnly.php

@ -1,26 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
trait FauxReadOnly {
public function __get(string $name) {
$prop = "_$name";
$exists = property_exists($this, $prop);
if ($name === 'ownerGrammar' && $exists) {
return $this->_ownerGrammar->get();
}
if (!$exists) {
$trace = debug_backtrace();
trigger_error("Cannot get undefined property $name in {$trace[0]['file']} on line {$trace[0]['line']}", E_USER_NOTICE);
return null;
}
return $this->$prop;
}
}

28
lib/Grammar/GrammarReference.php

@ -5,38 +5,26 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar,
dW\Lit\GrammarRegistry;
use dW\Lit\{
Grammar,
GrammarRegistry
};
/**
* Acts as a sort of lazy reference for entire grammars in grammars.
* Grammar references act as a placeholder for grammars in rule lists
*/
class GrammarReference extends Reference {
protected ?Grammar $object = null;
protected string $_scopeName;
public function __construct(string $scopeName, Grammar $ownerGrammar) {
public function __construct(string $scopeName, string $ownerGrammarScopeName) {
$this->_scopeName = $scopeName;
parent::__construct($ownerGrammar);
parent::__construct($ownerGrammarScopeName);
}
public function get(): Grammar {
if ($this->object !== null) {
return $this->object;
} elseif ($this->object === false) {
return null;
}
$grammar = GrammarRegistry::get($this->_scopeName);
if ($grammar === null) {
$this->object = false;
return null;
}
$this->object = $grammar->withOwnerGrammar($this->_ownerGrammar->get());
return $this->object;
return GrammarRegistry::get($this->_scopeName);
}
}

33
lib/Grammar/NamedPatternList.php

@ -1,33 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly;
/** Immutable named pattern list used for repositories and injection lists. */
abstract class NamedPatternList extends ImmutableList {
use FauxReadOnly;
public function __construct(array $array) {
// This shit is here because PHP doesn't have array types or generics :)
foreach ($array as $k => $v) {
if (!is_string($k)) {
throw new Exception(Exception::LIST_INVALID_TYPE, 'String', 'supplied array index', gettype($k));
}
if (!$v instanceof Pattern && !$v instanceof PatternList && !$v instanceof Reference) {
$type = gettype($v);
if ($type === 'object') {
$type = get_class($v);
}
throw new Exception(Exception::LIST_INVALID_TYPE, __NAMESPACE__.'\Pattern, '.__NAMESPACE__.'\PatternList, '.__NAMESPACE__.'\Reference', 'supplied array value', $type);
}
}
$this->storage = $array;
}
}

16
lib/Grammar/Pattern.php

@ -16,11 +16,10 @@ class Pattern extends Rule {
protected bool $_endPattern = false;
protected ?string $_match;
protected ?string $_name;
protected \WeakReference $_ownerGrammar;
protected ?array $_patterns;
public function __construct(Grammar $ownerGrammar, ?string $name = null, ?string $contentName = null, ?string $match = null, ?array $patterns = null, ?array $captures = null, bool $beginPattern = false, bool $endPattern = false) {
public function __construct(?string $name = null, ?string $contentName = null, ?string $match = null, ?array $patterns = null, ?array $captures = null, bool $beginPattern = false, bool $endPattern = false) {
$this->_beginPattern = $beginPattern;
$this->_name = $name;
$this->_contentName = $contentName;
@ -28,18 +27,5 @@ class Pattern extends Rule {
$this->_patterns = $patterns;
$this->_captures = $captures;
$this->_endPattern = $endPattern;
$this->_ownerGrammar = ($ownerGrammar === null) ? null : \WeakReference::create($ownerGrammar);
}
// Used when adopting to change the $ownerGrammar property.
public function withOwnerGrammar(Grammar $ownerGrammar): self {
$new = parent::withOwnerGrammar($ownerGrammar);
if ($new->_patterns !== null) {
foreach ($new->_patterns as &$p) {
$p = $p->withOwnerGrammar($ownerGrammar);
}
}
return $new;
}
}

16
lib/Grammar/PatternList.php

@ -1,16 +0,0 @@
<?php
/** @license MIT
* Copyright 2021 Dustin Wilson et al.
* See LICENSE file for details */
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
/** Immutable list of pattern rules */
class PatternList extends ImmutableList {
public function __construct(Pattern|Reference|\WeakReference ...$values) {
parent::__construct(...$values);
}
}

11
lib/Grammar/Reference.php

@ -5,7 +5,16 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\FauxReadOnly;
/** Acts as a catch-all type for references */
abstract class Reference extends Rule {}
abstract class Reference extends Rule {
use FauxReadOnly;
protected string $_ownerGrammarScopeName;
public function __construct(string $ownerGrammarScopeName) {
$this->_ownerGrammarScopeName = $ownerGrammarScopeName;
}
}

26
lib/Grammar/RepositoryReference.php

@ -5,37 +5,29 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
use dW\Lit\GrammarRegistry;
/**
* Acts as a sort of lazy reference for repository items in grammars.
* Repository references act as a placeholder for named repository patterns in
* rule lists
*/
class RepositoryReference extends Reference {
protected string $_name;
protected PatternList|Pattern|null|false $object = null;
public function __construct(string $name, Grammar $ownerGrammar) {
public function __construct(string $name, string $ownerGrammarScopeName) {
$this->_name = $name;
parent::__construct($ownerGrammar);
parent::__construct($ownerGrammarScopeName);
}
public function get(): PatternList|Pattern|null {
if ($this->object === false) {
public function get(): ?Pattern {
$grammar = GrammarRegistry::get($this->_ownerGrammarScopeName);
if ($grammar->repository === null) {
return null;
} elseif ($this->object !== null) {
return $this->object;
}
$grammar = $this->_ownerGrammar->get();
if (!isset($grammar->repository[$this->name])) {
$this->object = false;
return null;
}
$this->object = $grammar->repository[$this->name];
return $this->object;
return (isset($grammar->repository[$this->_name])) ? $grammar->repository[$this->_name] : null;
}
}

17
lib/Grammar/Rule.php

@ -5,26 +5,11 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
use dW\Lit\FauxReadOnly;
/**
* Abstract class used as a base class for Pattern and Reference classes
*/
abstract class Rule {
use FauxReadOnly;
protected \WeakReference $_ownerGrammar;
public function __construct(Grammar $ownerGrammar) {
$this->_ownerGrammar = \WeakReference::create($ownerGrammar);
}
// Used when adopting to change the $ownerGrammar property.
public function withOwnerGrammar(Grammar $ownerGrammar): self {
$new = clone $this;
$new->_ownerGrammar = \WeakReference::create($ownerGrammar);
return $new;
}
}

14
lib/Grammar/SelfReference.php

@ -5,16 +5,14 @@
declare(strict_types=1);
namespace dW\Lit\Grammar;
use dW\Lit\Grammar;
use dW\Lit\{
Grammar,
GrammarRegistry
};
/** A weak reference to a grammar's self. */
/** A reference to a grammar's self. */
class SelfReference extends Reference {
public function __construct(Grammar $grammar) {
parent::__construct($grammar);
}
public function get(): Grammar {
return $this->_ownerGrammar->get();
return GrammarRegistry::get($this->_ownerGrammarScopeName);
}
}

3
lib/GrammarRegistry.php

@ -5,7 +5,6 @@
declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Grammar\ChildGrammarRegistry;
/** Static storage for grammars; a map of a scope string and a Grammar object */
@ -13,8 +12,6 @@ class GrammarRegistry {
protected static array $storage = [];
public static function clear(): bool {
// Clear all the child grammars, too.
ChildGrammarRegistry::clear();
self::$storage = [];
return true;
}

17
lib/Tokenizer.php

@ -6,6 +6,7 @@
declare(strict_types=1);
namespace dW\Lit;
use dW\Lit\Grammar\{
BaseReference,
Pattern,
Reference,
RepositoryReference
@ -112,11 +113,6 @@ class Tokenizer {
while (true) {
$rule = $currentRules[$i];
if ($this->debug === 19 && $this->debugCount === 3) {
$rule->get();
die(var_export($rule->ownerGrammar));
}
// If the rule is a Pattern
if ($rule instanceof Pattern) {
// Throw out pattern regexes with anchors that should match the current line.
@ -158,9 +154,14 @@ class Tokenizer {
}
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into
// the rule list, and reprocess the rule.
elseif ($rule instanceof Reference && $obj = $rule->get()) {
if ($obj instanceof Grammar || ($rule instanceof RepositoryReference && $obj->match === null)) {
$obj = $obj->patterns;
elseif ($rule instanceof Reference) {
if (!$rule instanceof BaseReference) {
$obj = $rule->get();
if ($obj instanceof Grammar || ($rule instanceof RepositoryReference && $obj->match === null)) {
$obj = $obj->patterns;
}
} else {
$obj = $this->grammar->patterns;
}
array_splice($currentRules, $i, 1, ($obj instanceof Pattern) ? [ $obj ] : $obj);

Loading…
Cancel
Save