Browse Source

Started adding assertions for easier debugging of Tokenizer

main
Dustin Wilson 3 years ago
parent
commit
98bc4ff794
  1. 4
      lib/Highlight.php
  2. 88
      lib/Scope/Parser.php
  3. 73
      lib/Tokenizer.php

4
lib/Highlight.php

@ -25,8 +25,8 @@ class Highlight {
foreach ($tokenList as $lineNumber => $tokens) { foreach ($tokenList as $lineNumber => $tokens) {
if ($lineNumber === 26) { if ($lineNumber === 26) {
var_export($tokens); //var_export($tokens);
echo "\n"; //echo "\n";
die(); die();
} }
} }

88
lib/Scope/Parser.php

@ -8,6 +8,11 @@ namespace dW\Lit\Scope;
/** Parses strings into a scope selector */ /** Parses strings into a scope selector */
class Parser { class Parser {
// Flag for turning on debugging on the class. Assertions must be enabled for
// it to work. This allows for parsing of scopes to be debugged separately from
// tokenization of input text.
public static bool $debug = false;
// Used to cache parsed scopes and selectors // Used to cache parsed scopes and selectors
protected static array $cache = [ protected static array $cache = [
'selector' => [], 'selector' => [],
@ -71,7 +76,7 @@ class Parser {
protected static function parseComposite(): Composite { protected static function parseComposite(): Composite {
assert((fn() => self::debug())()); assert((fn() => self::debugStart())());
$expressions = [ self::parseExpression() ]; $expressions = [ self::parseExpression() ];
@ -97,7 +102,7 @@ class Parser {
} }
protected static function parseExpression(int $operator = Expression::OPERATOR_NONE): Expression { protected static function parseExpression(int $operator = Expression::OPERATOR_NONE): Expression {
assert((fn() => self::debug())()); assert((fn() => self::debugStart())());
$peek = self::$instance->data->peek(); $peek = self::$instance->data->peek();
$negate = false; $negate = false;
@ -121,7 +126,7 @@ class Parser {
} }
protected static function parseFilter(string $prefix): Filter { protected static function parseFilter(string $prefix): Filter {
assert((fn() => self::debug())()); assert((fn() => self::debugStart())());
$peek = self::$instance->data->peek(); $peek = self::$instance->data->peek();
if ($peek === '(') { if ($peek === '(') {
@ -136,7 +141,7 @@ class Parser {
} }
protected static function parseGroup(): Group { protected static function parseGroup(): Group {
assert((fn() => self::debug())()); assert((fn() => self::debugStart())());
$token = self::$instance->data->consume(); $token = self::$instance->data->consume();
if ($token !== '(') { if ($token !== '(') {
@ -156,7 +161,7 @@ class Parser {
} }
protected static function parsePath(): Path { protected static function parsePath(): Path {
assert((fn() => self::debug())()); assert((fn() => self::debugStart())());
$anchorStart = false; $anchorStart = false;
if (self::$instance->data->peek() === '^') { if (self::$instance->data->peek() === '^') {
@ -204,7 +209,7 @@ class Parser {
} }
protected static function _parseSelector(): Selector { protected static function _parseSelector(): Selector {
assert((fn() => self::debug())()); assert((fn() => self::debugStart())());
$composites = [ self::parseComposite() ]; $composites = [ self::parseComposite() ];
$peek = self::$instance->data->peek(); $peek = self::$instance->data->peek();
@ -220,7 +225,7 @@ class Parser {
} }
protected static function _parseScope(?Scope $parent = null, bool $anchorToPrevious = false): Scope { protected static function _parseScope(?Scope $parent = null, bool $anchorToPrevious = false): Scope {
assert((fn() => self::debug())()); assert((fn() => self::debugStart())());
$atoms = []; $atoms = [];
$first = true; $first = true;
@ -244,43 +249,48 @@ class Parser {
return $result; return $result;
} }
protected static function debug(): bool {
$message = <<<DEBUG
------------------------------
%s
Method: %s
Position: %s
Token: %s
DEBUG;
$methodTree = '';
$backtrace = debug_backtrace();
// Shift two off because it's executed in an assert closure
array_shift($backtrace);
array_shift($backtrace);
// And, pop this method off the backtrace
array_pop($backtrace);
foreach ($backtrace as $b) {
$methodTree = "->{$b['function']}$methodTree";
}
printf($message, private static function debugStart(): bool {
self::$instance->debugCount++, if (self::$debug) {
ltrim($methodTree, '->'), $message = <<<DEBUG
self::$instance->data->position + 1, ------------------------------
var_export(self::$instance->data->peek(), true) %s
); Method: %s
Position: %s
Token: %s
DEBUG;
$methodTree = '';
$backtrace = debug_backtrace();
// Shift two off because it's executed in an assert closure
array_shift($backtrace);
array_shift($backtrace);
// And, pop this method off the backtrace
array_pop($backtrace);
foreach ($backtrace as $b) {
$methodTree = "->{$b['function']}$methodTree";
}
printf($message,
self::$instance->debugCount++,
ltrim($methodTree, '->'),
self::$instance->data->position + 1,
var_export(self::$instance->data->peek(), true)
);
}
return true; return true;
} }
protected static function debugResult($result): bool { private static function debugResult($result): bool {
printf("%s Result: %s\n", if (self::$debug) {
debug_backtrace()[2]['function'], printf("%s Result: %s\n",
// Removes bullshit from var_exported classes for easier reading debug_backtrace()[2]['function'],
str_replace([ '::__set_state(array', __NAMESPACE__.'\\', '))' ], [ '', '', ')' ], var_export($result, true)) // Removes bullshit from var_exported classes for easier reading
); str_replace([ '::__set_state(array', __NAMESPACE__.'\\', '))' ], [ '', '', ')' ], var_export($result, true))
);
}
return true; return true;
} }

73
lib/Tokenizer.php

@ -18,6 +18,8 @@ use dW\Lit\Scope\{
class Tokenizer { class Tokenizer {
public static bool $debug = false;
protected Data $data; protected Data $data;
protected Grammar $grammar; protected Grammar $grammar;
protected int $offset = 0; protected int $offset = 0;
@ -26,7 +28,7 @@ class Tokenizer {
protected array $scopeStack; protected array $scopeStack;
protected const SCOPE_RESOLVE_REGEX = '/\$(\d+)|\${(\d+):\/(downcase|upcase)}/S'; protected const SCOPE_RESOLVE_REGEX = '/\$(\d+)|\${(\d+):\/(downcase|upcase)}/S';
protected const ANCHOR_CHECK_REGEX = '/(?<!\\\)\\\([AGzZ])/S'; protected const ANCHOR_CHECK_REGEX = '/(?<!\\\)\\\([AGZz])/S';
public function __construct(Data $data, Grammar $grammar) { public function __construct(Data $data, Grammar $grammar) {
@ -39,6 +41,8 @@ class Tokenizer {
public function tokenize(): \Generator { public function tokenize(): \Generator {
foreach ($this->data->get() as $lineNumber => $line) { foreach ($this->data->get() as $lineNumber => $line) {
assert($this->debugLine($lineNumber, $line));
$this->offset = 0; $this->offset = 0;
$lineLength = strlen($line); $lineLength = strlen($line);
@ -60,6 +64,8 @@ class Tokenizer {
]; ];
} }
assert($this->debugTokens($tokens));
yield $lineNumber => $tokens; yield $lineNumber => $tokens;
} }
} }
@ -172,6 +178,8 @@ class Tokenizer {
} }
} }
assert($this->debugClosestMatch($closestMatch));
// If there were a match above... // If there were a match above...
if ($closestMatch !== null) { if ($closestMatch !== null) {
$match = $closestMatch['match']; $match = $closestMatch['match'];
@ -414,4 +422,67 @@ class Tokenizer {
return $tokens; return $tokens;
} }
private function debugClosestMatch(?array $closestMatch): bool {
if (self::$debug) {
$message = <<<DEBUG
Regex: %s
Scope: %s
BeginPattern: %s
EndPattern: %s
Match: %s
DEBUG;
$message = sprintf($message,
$closestMatch['pattern']->match ?? 'NULL',
$closestMatch['pattern']->name ?? 'NULL',
var_export($closestMatch['pattern']->beginPattern ?? null, true),
var_export($closestMatch['pattern']->endPattern ?? null, true),
var_export($closestMatch['match'] ?? null, true)
);
echo $this->debug_indentLines($message) . "\n\n";
}
return true;
}
private function debug_indentLines(string $message): string {
$backtrace = debug_backtrace();
array_shift($backtrace);
array_shift($backtrace);
$count = -1;
foreach ($backtrace as $b) {
if ($b['function'] === 'tokenizeLine') {
$count++;
}
}
return ($count > 0) ? preg_replace('/^/m', str_repeat('|', $count) . ' ', $message) : $message;
}
private function debugLine(int $lineNumber, string $line): bool {
if (self::$debug) {
$message = <<<DEBUG
%s
Line: %s
DEBUG;
printf($message, str_pad("$lineNumber ", 80, '-'), var_export($line, true));
}
return true;
}
public function debugTokens(array $tokens): bool {
if (self::$debug) {
echo 'Tokens: ' . var_export($tokens, true) . "\n\n";
}
return true;
}
} }
Loading…
Cancel
Save