Browse Source

Cache spliced rule lists in tokenizer; much faster now

main
Dustin Wilson 3 years ago
parent
commit
15c3576977
  1. 8
      lib/Data.php
  2. 30
      lib/Tokenizer.php

8
lib/Data.php

@ -12,7 +12,7 @@ class Data {
// True if on the first line
protected bool $_firstLine = true;
// The stored generator
protected \Generator $_generator;
protected \Generator $generator;
// True if on the last line.
protected bool $_lastLine = false;
// Some matches will check for the last line before the final newline, so this
@ -28,7 +28,11 @@ class Data {
public function __construct(string $data) {
$this->lines = explode("\n", $data);
$this->linesLength = count($this->lines);
$this->_generator = $this->lineGenerator();
$this->generator = $this->lineGenerator();
}
public function get(): \Generator {
return $this->generator;
}

30
lib/Tokenizer.php

@ -21,12 +21,17 @@ class Tokenizer {
public static bool $debug = false;
protected Data $data;
protected int $debugCount = 0;
protected Grammar $grammar;
protected int $offset = 0;
protected ?Pattern $activeInjection = null;
protected string $line = '';
protected int $lineNumber = 1;
// Cache of rule lists which have had references spliced to keep from having to
// repeatedly splice in the same reference. It needs to be in two arrays because
// PHP doesn't have a functioning Map object; the index needs to be an array
// itself.
protected array $ruleCacheIndexes = [];
protected array $ruleCacheValues = [];
protected array $ruleStack;
protected array $scopeStack;
@ -43,9 +48,16 @@ class Tokenizer {
public function tokenize(): \Generator {
foreach ($this->data->generator as $lineNumber => $line) {
foreach ($this->data->get() as $lineNumber => $line) {
$this->lineNumber = $lineNumber;
$this->line = $line;
// Because of how this tokenizes if the final line is just a new line it will
// yield an empty token set; just end the generator instead.
if ($this->data->lastLine && $line === '') {
return;
}
assert($this->debugLine());
$this->offset = 0;
@ -106,7 +118,11 @@ class Tokenizer {
}
}
$currentRules = end($this->ruleStack)->patterns;
// Grab the current rule list from the cache if available to prevent having to
// splice in references repeatedly.
$cacheIndex = array_search(end($this->ruleStack)->patterns, $this->ruleCacheIndexes);
$currentRules = ($cacheIndex !== false) ? $this->ruleCacheValues[$cacheIndex] : end($this->ruleStack)->patterns;
$currentRulesCount = count($currentRules);
$closestMatch = null;
@ -170,6 +186,14 @@ class Tokenizer {
array_splice($currentRules, $i, 1, ($obj instanceof Pattern) ? [ $obj ] : $obj);
$currentRulesCount = count($currentRules);
// When the current rule list changes write it to the cache.
if ($cacheIndex === false) {
$this->ruleCacheIndexes[] = end($this->ruleStack)->patterns;
$cacheIndex = count($this->ruleCacheIndexes) - 1;
}
$this->ruleCacheValues[$cacheIndex] = $currentRules;
continue;
}

Loading…
Cancel
Save