You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
628 lines
30 KiB
628 lines
30 KiB
<?php
|
|
/** @license MIT
|
|
* Copyright 2021 Dustin Wilson et al.
|
|
* See LICENSE file for details */
|
|
|
|
declare(strict_types=1);
|
|
namespace MensBeam\Lit;
|
|
use MensBeam\Lit\Grammar\{
|
|
BaseReference,
|
|
Pattern,
|
|
Reference,
|
|
RepositoryReference
|
|
};
|
|
use MensBeam\Lit\Scope\{
|
|
Filter,
|
|
Parser as ScopeParser
|
|
};
|
|
|
|
|
|
/** Class for tokenizing input data */
|
|
class Tokenizer {
|
|
// Used for debugging; assertions (`ini_set('zend.assertions', '1')`) must be
|
|
// enabled to see debug output.
|
|
public static bool $debug = false;
|
|
// The input Data class.
|
|
protected Data $data;
|
|
// The supplied Grammar used to highlight the input data.
|
|
protected Grammar $grammar;
|
|
// The offset/position on the line the tokenizer is currently at.
|
|
protected int $offset = 0;
|
|
// Flag used to tell the tokenizer an injection is on the rule stack.
|
|
protected bool $activeInjection = false;
|
|
// The current line being tokenized.
|
|
protected string $line = '';
|
|
// The current line number of the input data being tokenized.
|
|
protected int $lineNumber = 1;
|
|
// Cache of rule lists which have had references spliced to keep from having to
|
|
// repeatedly splice in the same reference. It needs to be in two arrays because
|
|
// PHP doesn't have a functioning Map object; the index needs to be an array
|
|
// itself.
|
|
protected array $ruleCacheIndexes = [];
|
|
protected array $ruleCacheValues = [];
|
|
// The stack of rules
|
|
protected array $ruleStack;
|
|
// The stack of scopes
|
|
protected array $scopeStack;
|
|
|
|
protected array $previousMatches = [];
|
|
|
|
protected const SCOPE_RESOLVE_REGEX = '/\$(\d+)|\${(\d+):\/(downcase|upcase)}/S';
|
|
protected const ANCHOR_CHECK_REGEX = '/(?<!\\\)\\\([AGZz])/S';
|
|
protected const BACK_REFERENCE_REGEX = '/\\\\(\d+)/S';
|
|
|
|
|
|
public function __construct(Data $data, Grammar $grammar) {
|
|
$this->data = $data;
|
|
$this->grammar = $grammar;
|
|
$this->ruleStack = [ $this->grammar ];
|
|
|
|
// Grammar scope names can contain spaces (eg: source.js.rails
|
|
// source.js.jquery), meaning more than one needs to be added to the stack. It
|
|
// doesn't look like anything does it elsewhere, so spaces in scope names only
|
|
// work here.
|
|
$this->scopeStack = preg_split('/\s+/', $this->grammar->scopeName);
|
|
}
|
|
|
|
/** Receives lines from the Data object and yields an array of tokens */
|
|
public function tokenize(): \Generator {
|
|
foreach ($this->data->get() as $lineNumber => $line) {
|
|
$this->lineNumber = $lineNumber;
|
|
$this->line = $line;
|
|
|
|
// Because of how this tokenizes if the final line is just a new line it will
|
|
// yield an empty token set; just end the generator instead.
|
|
if ($this->data->lastLine && $line === '') {
|
|
return;
|
|
}
|
|
|
|
assert($this->debugLine());
|
|
$this->offset = 0;
|
|
|
|
$lineLength = strlen($line);
|
|
$tokens = ($lineLength > 0) ? $this->tokenizeLine($lineLength) : [];
|
|
|
|
// Output a token for everything else contained on the line including the
|
|
// newline or just a newline if there weren't any spare characters left on the
|
|
// line. If it is the last line, and there's nothing else remaining on the line
|
|
// then output no additional token.
|
|
if ($this->offset < $lineLength) {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => substr($line, $this->offset, $lineLength - $this->offset) . ((!$this->data->lastLine) ? "\n" : '')
|
|
];
|
|
} elseif (!$this->data->lastLine) {
|
|
// Some matches can grab the new line, so check to see if it's already present
|
|
// before appending another token.
|
|
$lastToken = end($tokens);
|
|
if ($lastToken === false || substr($lastToken['text'], -1) !== "\n") {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => "\n"
|
|
];
|
|
}
|
|
}
|
|
|
|
assert($this->debugTokens($tokens));
|
|
yield $lineNumber => $tokens;
|
|
}
|
|
}
|
|
|
|
|
|
protected function resolveScopeName(string $scopeName, array $match): string {
|
|
return preg_replace_callback(self::SCOPE_RESOLVE_REGEX, function($m) use($match) {
|
|
$replacement = trim($match[(int)$m[1]][0] ?? $m[1]);
|
|
$command = $m[2] ?? null;
|
|
switch ($command) {
|
|
case 'downcase': return strtolower($replacement);
|
|
break;
|
|
case 'upcase': return strtoupper($replacement);
|
|
break;
|
|
default: return $replacement;
|
|
}
|
|
}, $scopeName);
|
|
}
|
|
|
|
protected function tokenizeLine(int $stopOffset): array {
|
|
$tokens = [];
|
|
|
|
while (true) {
|
|
$closestMatch = $this->findClosestMatch(end($this->ruleStack));
|
|
$this->previousMatches[] = $closestMatch;
|
|
assert($this->debugClosestMatch($closestMatch));
|
|
|
|
// If there were a match above...
|
|
if ($closestMatch !== null) {
|
|
$match = $closestMatch['match'];
|
|
$pattern = $closestMatch['pattern'];
|
|
|
|
// If the subpattern begins after the offset then create a token from the bits
|
|
// of the line in-between the last token and the one(s) about to be created.
|
|
// However, don't do this if the pattern is an end pattern and its match
|
|
// contains a negative lookahead for the offset. This is due to a difference in
|
|
// how PCRE works versus the original Oniguruma.
|
|
if ($match[0][1] > $this->offset && !($pattern->endPattern && preg_match('/\(\?!\\\G\)/', $pattern->match) === 1)) {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => substr($this->line, $this->offset, $match[0][1] - $this->offset)
|
|
];
|
|
$this->offset = $match[0][1];
|
|
}
|
|
|
|
// If the pattern is an end pattern and its corresponding begin pattern has a
|
|
// content name remove that from the scope stack before continuing.
|
|
if ($pattern->endPattern) {
|
|
$previousRule = end($this->ruleStack);
|
|
if ($previousRule->beginPattern && $previousRule->contentName !== null) {
|
|
array_pop($this->scopeStack);
|
|
}
|
|
}
|
|
|
|
// Add the name to the scope stack if present.
|
|
if ($pattern->name !== null) {
|
|
$this->scopeStack[] = $this->resolveScopeName($pattern->name, $match);
|
|
}
|
|
|
|
// If a rule has captures iterate through each of the matched subpatterns and
|
|
// create tokens from the captures.
|
|
if ($pattern->captures !== null) {
|
|
foreach ($match as $k => $m) {
|
|
// If either the capture match is empty, there's no pattern capture for this
|
|
// match, or the match being processed is the first one and there are no
|
|
// captures for it then continue onto the next one.
|
|
if ($m[0] === '' || $m[1] < 0 || !isset($pattern->captures[$k]) || ($k === 0 && !isset($pattern->captures[0]))) {
|
|
continue;
|
|
}
|
|
|
|
// If the capture begins after the offset then create a token from the bits of
|
|
// the line in-between the last token and the one(s) about to be created.
|
|
if ($k > 0 && $m[1] > $this->offset) {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => substr($this->line, $this->offset, $m[1] - $this->offset)
|
|
];
|
|
|
|
$this->offset = $m[1];
|
|
}
|
|
|
|
// If the capture has a name add it to the scope stack.
|
|
if ($pattern->captures[$k]->name !== null) {
|
|
$this->scopeStack[] = $this->resolveScopeName($pattern->captures[$k]->name, $match);
|
|
}
|
|
|
|
// If the capture has patterns of its own add the capture to the rule stack,
|
|
// process the patterns, and then pop the capture off the stack.
|
|
if ($pattern->captures[$k]->patterns !== null) {
|
|
if ($m[1] < $this->offset) {
|
|
die("MOTHERFUCKER!\n");
|
|
}
|
|
|
|
$this->ruleStack[] = $pattern->captures[$k];
|
|
// Don't do injections on capture lists.
|
|
$activeInjection = $this->activeInjection;
|
|
$this->activeInjection = true;
|
|
// Only tokenize the part of the line that's contains the match.
|
|
$captureEndOffset = $m[1] + strlen($m[0]);
|
|
$tokens = [ ...$tokens, ...$this->tokenizeLine($captureEndOffset) ];
|
|
|
|
// If the offset is before the end of the capture then create a token from the
|
|
// bits of the capture from the offset until the end of the capture.
|
|
if ($captureEndOffset > $this->offset) {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => substr($this->line, $this->offset, $captureEndOffset - $this->offset)
|
|
];
|
|
$this->offset = $captureEndOffset;
|
|
}
|
|
|
|
array_pop($this->ruleStack);
|
|
// Return to the original active injection state.
|
|
$this->activeInjection = $activeInjection;
|
|
$this->offset = $m[1] + strlen($m[0]);
|
|
}
|
|
// Otherwise, create a token for the capture.
|
|
else {
|
|
// If the capture's offset is before the current offset then the new token needs
|
|
// to be spliced within previously emitted ones.
|
|
if ($m[1] < $this->offset) {
|
|
$curOffset = $this->offset;
|
|
// Go backwards through the tokens, find the token the current capture is
|
|
// within, and splice new tokens into the token array
|
|
for ($tokensLength = count($tokens), $i = $tokensLength - 1; $i >= 0; $i--) {
|
|
$cur = $tokens[$i];
|
|
$curOffset -= strlen($cur['text']);
|
|
if ($m[1] >= $curOffset) {
|
|
// If the length of the new capture would put part of it outside the previous
|
|
// token then toss the token.
|
|
if ($m[1] + strlen($m[0]) > $curOffset + strlen($cur['text'])) {
|
|
// TODO: trigger a warning or something here maybe?
|
|
break;
|
|
}
|
|
|
|
$t = [];
|
|
|
|
// Add in token for anything before the new capture token within the token being
|
|
// spliced
|
|
$preMatchText = substr($cur['text'], 0, $m[1] - $curOffset);
|
|
if ($preMatchText !== '') {
|
|
$t[] = [
|
|
'scopes' => $cur['scopes'],
|
|
'text' => $preMatchText
|
|
];
|
|
}
|
|
|
|
// The new capture's scope needs to be added to the prior token's scope stack to
|
|
// make the stack for the new one.
|
|
$scopeStack = $cur['scopes'];
|
|
$scopeStack[] = $pattern->captures[$k]->name;
|
|
$t[] = [
|
|
'scopes' => $scopeStack,
|
|
'text' => $m[0]
|
|
];
|
|
|
|
// Add in token for anything after the new capture token within the token being
|
|
// spliced
|
|
$postMatchText = substr($cur['text'], $m[1] - $curOffset + strlen($m[0]));
|
|
if ($postMatchText !== '') {
|
|
$t[] = [
|
|
'scopes' => $cur['scopes'],
|
|
'text' => $postMatchText
|
|
];
|
|
}
|
|
|
|
array_splice($tokens, $i, 1, $t);
|
|
|
|
// Find the nearest index to the match that doesn't have an invalid offset value
|
|
// (meaning that particular capture matched nothing) and set the offset to the
|
|
// end of that match.
|
|
$j = count($match) - 2;
|
|
while ($match[$j][1] === -1 || $match[$j][1] === null) {
|
|
$j--;
|
|
}
|
|
|
|
$this->offset = $match[$j][1] + strlen($match[$j][0]);
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => $m[0]
|
|
];
|
|
|
|
$this->offset = $m[1] + strlen($m[0]);
|
|
}
|
|
}
|
|
|
|
// Pop the capture's name off the scope stack.
|
|
if ($pattern->captures[$k]->name !== null) {
|
|
array_pop($this->scopeStack);
|
|
}
|
|
}
|
|
}
|
|
// Otherwise, if the rule doesn't have captures then a token is created from the
|
|
// entire match, but only if the matched text isn't empty.
|
|
elseif ($match[0][0] !== '') {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => $match[0][0]
|
|
];
|
|
|
|
$this->offset = $match[0][1] + strlen($match[0][0]);
|
|
}
|
|
|
|
// If the pattern is a begin pattern and has a content name then add that to the
|
|
// scope stack before processing the children.
|
|
if ($pattern->beginPattern && $pattern->contentName !== null) {
|
|
$this->scopeStack[] = $this->resolveScopeName($pattern->contentName, $match);
|
|
}
|
|
|
|
$this->ruleStack[] = $pattern;
|
|
$this->activeInjection = ($pattern->injection);
|
|
|
|
// If the rule has patterns process tokens from its subpatterns.
|
|
if ($pattern->patterns !== null && $this->offset < $stopOffset) {
|
|
// If the pattern has just a regular match (meaning neither a begin nor an end
|
|
// pattern) but has subpatterns then only tokenize the part of the line that's
|
|
// within the match. Otherwise, tokenize up to the stop offset. Because of
|
|
// recursion, the stop offset could be set by this step before or within the
|
|
// capture tokenization process.
|
|
$tokens = [ ...$tokens, ...$this->tokenizeLine((!$pattern->beginPattern && !$pattern->endPattern) ? strlen($match[0][0]) : $stopOffset) ];
|
|
}
|
|
|
|
// If the offset is before the end of the match then create a token from the
|
|
// bits of the match from the offset until the end of the match. However, don't
|
|
// do this if the pattern is an end pattern and its match contains a negative
|
|
// lookahead for the offset. This is due to a difference in how PCRE works
|
|
// versus the original Oniguruma.
|
|
$endOffset = $match[0][1] + strlen($match[0][0]);
|
|
if ($endOffset > $this->offset && !($pattern->endPattern && preg_match('/\(\?!\\\G\)/', $pattern->match) === 1)) {
|
|
$tokens[] = [
|
|
'scopes' => $this->scopeStack,
|
|
'text' => substr($this->line, $this->offset, $endOffset - $this->offset)
|
|
];
|
|
$this->offset = $endOffset;
|
|
}
|
|
|
|
if (!$pattern->beginPattern) {
|
|
if ($pattern->endPattern) {
|
|
// Pop everything off both stacks until a begin pattern is reached.
|
|
while (!end($this->ruleStack)->beginPattern) {
|
|
$popped = array_pop($this->ruleStack);
|
|
|
|
if ($popped->name !== null) {
|
|
array_pop($this->scopeStack);
|
|
}
|
|
|
|
// If what was just popped is the active injection then remove it, too.
|
|
if ($popped->injection) {
|
|
$this->activeInjection = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
$popped = array_pop($this->ruleStack);
|
|
// Pop the rule's name from the stack.
|
|
if ($popped->name !== null) {
|
|
array_pop($this->scopeStack);
|
|
}
|
|
|
|
// If what was just popped is the active injection then remove it, too.
|
|
if ($popped->injection) {
|
|
$this->activeInjection = false;
|
|
}
|
|
}
|
|
|
|
// If the offset isn't at the end of the line then look for more matches.
|
|
if ($this->offset < $stopOffset) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
return $tokens;
|
|
}
|
|
|
|
protected function findClosestMatch(Grammar|Pattern $pattern): ?array {
|
|
$injected = false;
|
|
// Grab the current rule list from the cache if available to prevent having to
|
|
// splice in references repeatedly.
|
|
$cacheIndex = array_search($pattern->patterns, $this->ruleCacheIndexes);
|
|
if ($cacheIndex !== false) {
|
|
$currentRules = $this->ruleCacheValues[$cacheIndex];
|
|
} else {
|
|
$currentRules = $pattern->patterns;
|
|
|
|
if (!$this->activeInjection && $this->grammar->injections !== null) {
|
|
foreach ($this->grammar->injections as $selector => $injection) {
|
|
$selector = ScopeParser::parseSelector($selector);
|
|
if ($selector->matches($this->scopeStack)) {
|
|
$prefix = $selector->getPrefix($this->scopeStack);
|
|
if ($prefix === Filter::PREFIX_LEFT || $prefix === Filter::PREFIX_BOTH) {
|
|
$currentRules = [ ...$injection->patterns, ...$currentRules ];
|
|
if ($prefix === Filter::PREFIX_LEFT) {
|
|
break;
|
|
}
|
|
}
|
|
if ($prefix === null || $prefix === Filter::PREFIX_RIGHT || $prefix === Filter::PREFIX_BOTH) {
|
|
$currentRules = [ ...$currentRules, ...$injection->patterns ];
|
|
}
|
|
|
|
$injected = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$closestMatch = null;
|
|
for ($i = 0, $currentRulesCount = count($currentRules); $i < $currentRulesCount; $i++) {
|
|
while (true) {
|
|
$rule = $currentRules[$i];
|
|
|
|
// Grammar references can return false if the grammar does not exist, so
|
|
// continue on if the current rule is false.
|
|
if ($rule === false) {
|
|
continue 2;
|
|
}
|
|
|
|
// If the rule is a Pattern
|
|
if ($rule instanceof Pattern) {
|
|
$ruleMatch = $rule->match;
|
|
$offset = $this->offset;
|
|
|
|
// If the rule is an end pattern with a back reference then it expects to be
|
|
// able to reference a subpattern from its begin pattern. Replace the reference
|
|
// with the matched subpattern and then match with it below.
|
|
if ($rule->endPattern && preg_match(self::BACK_REFERENCE_REGEX, $ruleMatch, $m) === 1) {
|
|
$beginMatch = null;
|
|
for ($previousMatchesCount = count($this->previousMatches), $i = $previousMatchesCount - 1; $i >= 0; $i--) {
|
|
$cur = $this->previousMatches[$i];
|
|
if ($cur !== null && $cur['pattern']->beginPattern) {
|
|
foreach ($cur['pattern']->patterns as $p) {
|
|
if ($p === $rule) {
|
|
$beginMatch = $cur['match'];
|
|
break 2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($beginMatch !== null) {
|
|
$ruleMatch = preg_replace_callback(self::BACK_REFERENCE_REGEX, function($m) use ($beginMatch) {
|
|
$index = (int)$m[1];
|
|
return $beginMatch[$index][0] ?? $m[0];
|
|
}, $ruleMatch);
|
|
}
|
|
}
|
|
|
|
if (preg_match($ruleMatch, $this->line . ((!$this->data->lastLine) ? "\n" : ''), $match, PREG_OFFSET_CAPTURE, $offset) === 1) {
|
|
// Throw out pattern regexes with anchors that shouldn't match the current line.
|
|
// This is necessary because the tokenizer is fed data line by line and
|
|
// therefore anchors that match the beginning of the document and the end won't
|
|
// do anything.
|
|
if (preg_match(
|
|
self::ANCHOR_CHECK_REGEX, $ruleMatch, $validRegexMatch) === 1 && (
|
|
// \A anchors match the beginning of the whole string, not just this line
|
|
($validRegexMatch[1] === 'A' && !$this->data->firstLine) ||
|
|
// \z anchors match the end of the whole string, not just this line
|
|
($validRegexMatch[1] === 'z' && !$this->data->lastLine) ||
|
|
// \Z anchors match the end of the whole string or before the final newline if
|
|
// there's a trailing newline in the string
|
|
($validRegexMatch[1] === 'Z' && !$this->data->lastLineBeforeFinalNewLine)
|
|
)
|
|
) {
|
|
continue 2;
|
|
}
|
|
|
|
// If there is an existing match that doesn't match at the offset, the current
|
|
// matched rule is a begin pattern that doesn't capture anything, its end
|
|
// pattern would be the next match, and its end pattern also doesn't capture
|
|
// anything then discard this match. If this wasn't here it would continuously
|
|
// match this begin pattern followed by its end pattern, creating an infinite
|
|
// loop because the offset never moves forward.
|
|
if ($closestMatch !== null && $rule->beginPattern && $match[0][0] === '') {
|
|
$m = $this->findClosestMatch($rule);
|
|
if ($m !== null && $m['pattern']->endPattern && $m['match'][0][0] === '') {
|
|
continue 2;
|
|
}
|
|
}
|
|
|
|
// If the match's offset is the same as the current offset then it is the
|
|
// closest match. There's no need to iterate anymore through the patterns.
|
|
if ($match[0][1] === $this->offset) {
|
|
$closestMatch = [
|
|
'match' => $match,
|
|
'pattern' => $rule
|
|
];
|
|
break 2;
|
|
}
|
|
// Otherwise, if the closest match is currently null or the match's offset is
|
|
// less than the closest match's offset then set the match as the closest match
|
|
// and continue looking for a closer one.
|
|
elseif ($closestMatch === null || $match[0][1] < $closestMatch['match'][0][1]) {
|
|
$closestMatch = [
|
|
'match' => $match,
|
|
'pattern' => $rule
|
|
];
|
|
}
|
|
}
|
|
}
|
|
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into
|
|
// the rule list, and reprocess the rule.
|
|
elseif ($rule instanceof Reference) {
|
|
if (!$rule instanceof BaseReference) {
|
|
$obj = $rule->get();
|
|
if ($obj instanceof Grammar || ($rule instanceof RepositoryReference && $obj->match === null)) {
|
|
$obj = $obj->patterns;
|
|
}
|
|
} else {
|
|
$obj = $this->grammar->patterns;
|
|
}
|
|
|
|
array_splice($currentRules, $i, 1, ($obj instanceof Pattern) ? [ $obj ] : $obj);
|
|
$currentRulesCount = count($currentRules);
|
|
|
|
// When the current rule list changes write it to the cache.
|
|
if ($cacheIndex === false) {
|
|
$this->ruleCacheIndexes[] = end($this->ruleStack)->patterns;
|
|
$cacheIndex = count($this->ruleCacheIndexes) - 1;
|
|
}
|
|
|
|
if ($injected) {
|
|
// Injections need to be re-evaluated against the scope stack every time they're
|
|
// injected so don't cache them.
|
|
$temp = $currentRules;
|
|
foreach ($temp as $k => $r) {
|
|
if ($r instanceof Pattern && $r->injection) {
|
|
unset($temp[$k]);
|
|
}
|
|
}
|
|
$this->ruleCacheValues[$cacheIndex] = array_values($temp);
|
|
} else {
|
|
$this->ruleCacheValues[$cacheIndex] = $currentRules;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return $closestMatch;
|
|
}
|
|
|
|
|
|
private function debugClosestMatch(?array $closestMatch): bool {
|
|
if (self::$debug) {
|
|
$message = <<<DEBUG
|
|
Offset: %s
|
|
Regex: %s
|
|
Scope: %s
|
|
HasCaptures: %s
|
|
BeginPattern: %s
|
|
EndPattern: %s
|
|
Match: %s
|
|
DEBUG;
|
|
|
|
$message = sprintf($message,
|
|
$this->offset,
|
|
$closestMatch['pattern']->match ?? 'NULL',
|
|
$closestMatch['pattern']->name ?? 'NULL',
|
|
($closestMatch !== null && $closestMatch['pattern']->captures !== null) ? 'yes' : 'no',
|
|
var_export($closestMatch['pattern']->beginPattern ?? null, true),
|
|
var_export($closestMatch['pattern']->endPattern ?? null, true),
|
|
var_export($closestMatch['match'] ?? null, true)
|
|
);
|
|
|
|
echo $this->debug_indentLines($message) . "\n\n";
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private function debug_indentLines(string $message): string {
|
|
$backtrace = debug_backtrace();
|
|
array_shift($backtrace);
|
|
array_shift($backtrace);
|
|
|
|
$count = -1;
|
|
foreach ($backtrace as $b) {
|
|
if ($b['function'] === 'tokenizeLine') {
|
|
$count++;
|
|
}
|
|
}
|
|
|
|
return ($count > 0) ? preg_replace('/^/m', str_repeat('|', $count) . ' ', $message) : $message;
|
|
}
|
|
|
|
private function debugLine(): bool {
|
|
if (self::$debug) {
|
|
$message = <<<DEBUG
|
|
%s
|
|
Line: %s
|
|
|
|
|
|
DEBUG;
|
|
|
|
printf(
|
|
$message,
|
|
str_pad("{$this->lineNumber} ", 80, '-'),
|
|
preg_replace('/\\\\{2}/', '\\', var_export($this->line, true))
|
|
);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
public function debugTokens(array $tokens): bool {
|
|
if (self::$debug) {
|
|
echo 'Tokens: ' . preg_replace('/\\\\{2}/', '\\', var_export($tokens, true)) . "\n\n";
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|