Browse Source

Progress

main
Dustin Wilson 3 years ago
parent
commit
088a28270b
  1. 11
      lib/Grammar.php
  2. 6
      lib/Highlight.php
  3. 53
      lib/Tokenizer.php

11
lib/Grammar.php

@ -25,7 +25,6 @@ use dW\Lit\Grammar\{
class Grammar {
use FauxReadOnly;
protected ?string $_contentName;
protected ?string $_firstLineMatch;
protected ?array $_injections;
protected ?string $_name;
protected ?\WeakReference $_ownerGrammar;
@ -34,11 +33,10 @@ class Grammar {
protected ?string $_scopeName;
public function __construct(?string $scopeName = null, ?array $patterns = null, ?string $name = null, ?string $firstLineMatch = null, ?array $injections = null, ?array $repository = null, ?Grammar $ownerGrammar = null) {
public function __construct(?string $scopeName = null, ?array $patterns = null, ?string $name = null, ?array $injections = null, ?array $repository = null, ?Grammar $ownerGrammar = null) {
$this->_name = $name;
$this->_scopeName = $scopeName;
$this->_patterns = $patterns;
$this->_firstLineMatch = $firstLineMatch;
$this->_injections = $injections;
$this->_repository = $repository;
$this->_ownerGrammar = (is_null($ownerGrammar)) ? null : \WeakReference::create($ownerGrammar);
@ -97,13 +95,6 @@ class Grammar {
$this->_name = $json['name'] ?? null;
$this->_scopeName = $json['scopeName'];
if (isset($json['firstLineMatch'])) {
$value = str_replace('/', '\/', $json['firstLineMatch']);
$this->_firstLineMatch = $value;
} else {
$this->_firstLineMatch = null;
}
$repository = null;
if (isset($json['repository'])) {
$respository = [];

6
lib/Highlight.php

@ -28,9 +28,9 @@ class Highlight {
$tokenList = $tokenizer->tokenize();
foreach ($tokenList as $lineNumber => $tokens) {
var_export($tokens);
echo "\n";
if ($lineNumber === 6) {
//var_export($tokens);
//echo "\n";
if ($lineNumber === 7) {
die();
}
}

53
lib/Tokenizer.php

@ -38,6 +38,7 @@ class Tokenizer {
public function tokenize(): \Generator {
foreach ($this->data as $lineNumber => $line) {
$this->debug = $lineNumber;
$this->debugCount = 0;
$this->offset = 0;
$lineLength = strlen($line);
@ -93,30 +94,32 @@ class Tokenizer {
$currentRules = end($this->ruleStack)->patterns;
$currentRulesCount = count($currentRules);
$nextMatch = null;
$closestMatch = null;
// Iterate through the rules to find matches for the line at the current offset.
for ($i = 0; $i < $currentRulesCount; $i++) {
while (true) {
$rule = $currentRules[$i];
if ($this->debug === 6 && $this->debugCount === 12) {
if ($rule instanceof Pattern) {
echo "Match: {$rule->match}\n";
}
}
// If the rule is a Pattern and matches the line at the offset then...
if ($rule instanceof Pattern && preg_match($rule->match, $line, $match, PREG_OFFSET_CAPTURE, $this->offset)) {
$match = [
'match' => $match,
'pattern' => $rule
];
if ($match['match'][0][1] === $this->offset) {
$nextMatch = $match;
// If the match's offset is the same as the current offset then it is the
// closest match. There's no need to iterate anymore through the patterns.
if ($match[0][1] === $this->offset) {
$closestMatch = [
'match' => $match,
'pattern' => $rule
];
break 2;
} elseif ($match['match'][0][1] < $nextMatch['match'][0][1]) {
$nextMatch = $match;
}
// Otherwise, if the closest match is currently null or the match's offset is
// less than the closest match's offset then set the match as the closest match
// and continue looking for a closer one.
elseif ($closestMatch === null || $match[0][1] < $closestMatch['match'][0][1]) {
$closestMatch = [
'match' => $match,
'pattern' => $rule
];
}
}
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into
@ -136,15 +139,20 @@ class Tokenizer {
}
// If there were a match above...
if ($nextMatch !== null) {
$match = $nextMatch['match'];
$pattern = $nextMatch['pattern'];
if ($closestMatch !== null) {
$match = $closestMatch['match'];
$pattern = $closestMatch['pattern'];
if ($this->debug === 7) {
var_export($closestMatch);
echo "\n";
}
// **¡TEMPORARY!** Haven't implemented begin and end line
// anchors, so let's toss patterns with them completely for now.
if (preg_match('/\\\(?:A|G|Z)/', $rule->match)) {
continue;
}
//if (preg_match('/\\\(?:A|G|Z)/', $rule->match)) {
// continue;
//}
// If the subpattern begins after the offset then create a token from the bits
// of the line in-between the last token and the one(s) about to be created.
@ -257,6 +265,7 @@ class Tokenizer {
}
}
// If the offset isn't at the end of the line then look for more matches.
if ($this->offset !== $lineLength) {
continue;
}

Loading…
Cancel
Save