Browse Source

Setting up Tokenizer for recursion

main
Dustin Wilson 3 years ago
parent
commit
bb4b90a7b0
  1. 41
      lib/Tokenizer.php

41
lib/Tokenizer.php

@ -18,6 +18,9 @@ class Tokenizer {
protected array $ruleStack; protected array $ruleStack;
protected array $scopeStack; protected array $scopeStack;
const MATCH_MODE_SINGLE = 0;
const MATCH_MODE_BEGINEND = 1;
public function __construct(\Generator $data, Grammar $grammar) { public function __construct(\Generator $data, Grammar $grammar) {
$this->data = $data; $this->data = $data;
@ -32,7 +35,21 @@ class Tokenizer {
public function tokenize(): \Generator { public function tokenize(): \Generator {
foreach ($this->data as $lineNumber => $inputLine) { foreach ($this->data as $lineNumber => $line) {
yield $lineNumber => $this->tokenizeLine($line);
}
}
protected function getMatch(string $regex, string $line): ?array {
if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE) !== 1) {
return null;
}
return $match;
}
protected function tokenizeLine(string $inputLine): array {
$currentRules = end($this->ruleStack)->patterns->getIterator(); $currentRules = end($this->ruleStack)->patterns->getIterator();
$currentRulesCount = count($currentRules); $currentRulesCount = count($currentRules);
$results = []; $results = [];
@ -42,14 +59,17 @@ class Tokenizer {
while (true) { while (true) {
$rule = $currentRules[$i]; $rule = $currentRules[$i];
if ($rule instanceof Pattern) { if ($rule instanceof Pattern) {
$matchMode = null;
$regex = null; $regex = null;
if ($rule->match !== null) { if ($rule->match !== null) {
$regex = $rule->match; $regex = $rule->match;
$matchMode = self::MATCH_MODE_SINGLE;
} elseif ($rule->begin !== null) { } elseif ($rule->begin !== null) {
$regex = $rule->begin; $regex = $rule->begin;
$matchMode = self::MATCH_MODE_BEGINEND;
} }
if ($regex !== null && $match = $this->getMatch($regex, $line)) { if ($matchMode !== null && $match = $this->getMatch($regex, $line)) {
$scopeStack = $this->scopeStack; $scopeStack = $this->scopeStack;
if ($rule->name !== null) { if ($rule->name !== null) {
$scopeStack[] = $rule->name; $scopeStack[] = $rule->name;
@ -58,14 +78,9 @@ class Tokenizer {
$scopeStack[] = $rule->contentName; $scopeStack[] = $rule->contentName;
} }
$results[] = [
'scopeStack' => $scopeStack,
'matches' => $match
];
die(var_export($rule)); die(var_export($rule));
if ($rule->begin !== null) { if ($matchMode === self::MATCH_MODE_BEGINEND) {
$this->ruleStack[] = $rule; $this->ruleStack[] = $rule;
$this->scopeStack[] = $scopeStack; $this->scopeStack[] = $scopeStack;
} }
@ -87,13 +102,3 @@ class Tokenizer {
} }
} }
} }
protected function getMatch(string $regex, string $line): ?array {
if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE) !== 1) {
return null;
}
return $match;
}
}
Loading…
Cancel
Save