|
|
@ -18,6 +18,9 @@ class Tokenizer { |
|
|
|
protected array $ruleStack; |
|
|
|
protected array $scopeStack; |
|
|
|
|
|
|
|
const MATCH_MODE_SINGLE = 0; |
|
|
|
const MATCH_MODE_BEGINEND = 1; |
|
|
|
|
|
|
|
|
|
|
|
public function __construct(\Generator $data, Grammar $grammar) { |
|
|
|
$this->data = $data; |
|
|
@ -32,7 +35,21 @@ class Tokenizer { |
|
|
|
|
|
|
|
|
|
|
|
public function tokenize(): \Generator { |
|
|
|
foreach ($this->data as $lineNumber => $inputLine) { |
|
|
|
foreach ($this->data as $lineNumber => $line) { |
|
|
|
yield $lineNumber => $this->tokenizeLine($line); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
protected function getMatch(string $regex, string $line): ?array { |
|
|
|
if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE) !== 1) { |
|
|
|
return null; |
|
|
|
} |
|
|
|
|
|
|
|
return $match; |
|
|
|
} |
|
|
|
|
|
|
|
protected function tokenizeLine(string $inputLine): array { |
|
|
|
$currentRules = end($this->ruleStack)->patterns->getIterator(); |
|
|
|
$currentRulesCount = count($currentRules); |
|
|
|
$results = []; |
|
|
@ -42,14 +59,17 @@ class Tokenizer { |
|
|
|
while (true) { |
|
|
|
$rule = $currentRules[$i]; |
|
|
|
if ($rule instanceof Pattern) { |
|
|
|
$matchMode = null; |
|
|
|
$regex = null; |
|
|
|
if ($rule->match !== null) { |
|
|
|
$regex = $rule->match; |
|
|
|
$matchMode = self::MATCH_MODE_SINGLE; |
|
|
|
} elseif ($rule->begin !== null) { |
|
|
|
$regex = $rule->begin; |
|
|
|
$matchMode = self::MATCH_MODE_BEGINEND; |
|
|
|
} |
|
|
|
|
|
|
|
if ($regex !== null && $match = $this->getMatch($regex, $line)) { |
|
|
|
if ($matchMode !== null && $match = $this->getMatch($regex, $line)) { |
|
|
|
$scopeStack = $this->scopeStack; |
|
|
|
if ($rule->name !== null) { |
|
|
|
$scopeStack[] = $rule->name; |
|
|
@ -58,14 +78,9 @@ class Tokenizer { |
|
|
|
$scopeStack[] = $rule->contentName; |
|
|
|
} |
|
|
|
|
|
|
|
$results[] = [ |
|
|
|
'scopeStack' => $scopeStack, |
|
|
|
'matches' => $match |
|
|
|
]; |
|
|
|
|
|
|
|
die(var_export($rule)); |
|
|
|
|
|
|
|
if ($rule->begin !== null) { |
|
|
|
if ($matchMode === self::MATCH_MODE_BEGINEND) { |
|
|
|
$this->ruleStack[] = $rule; |
|
|
|
$this->scopeStack[] = $scopeStack; |
|
|
|
} |
|
|
@ -87,13 +102,3 @@ class Tokenizer { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
protected function getMatch(string $regex, string $line): ?array { |
|
|
|
if (preg_match($regex, $line, $match, PREG_OFFSET_CAPTURE) !== 1) { |
|
|
|
return null; |
|
|
|
} |
|
|
|
|
|
|
|
return $match; |
|
|
|
} |
|
|
|
} |