|
@ -38,6 +38,7 @@ class Tokenizer { |
|
|
public function tokenize(): \Generator { |
|
|
public function tokenize(): \Generator { |
|
|
foreach ($this->data as $lineNumber => $line) { |
|
|
foreach ($this->data as $lineNumber => $line) { |
|
|
$this->debug = $lineNumber; |
|
|
$this->debug = $lineNumber; |
|
|
|
|
|
$this->debugCount = 0; |
|
|
$this->offset = 0; |
|
|
$this->offset = 0; |
|
|
|
|
|
|
|
|
$lineLength = strlen($line); |
|
|
$lineLength = strlen($line); |
|
@ -93,30 +94,32 @@ class Tokenizer { |
|
|
|
|
|
|
|
|
$currentRules = end($this->ruleStack)->patterns; |
|
|
$currentRules = end($this->ruleStack)->patterns; |
|
|
$currentRulesCount = count($currentRules); |
|
|
$currentRulesCount = count($currentRules); |
|
|
$nextMatch = null; |
|
|
$closestMatch = null; |
|
|
|
|
|
|
|
|
|
|
|
// Iterate through the rules to find matches for the line at the current offset. |
|
|
for ($i = 0; $i < $currentRulesCount; $i++) { |
|
|
for ($i = 0; $i < $currentRulesCount; $i++) { |
|
|
while (true) { |
|
|
while (true) { |
|
|
$rule = $currentRules[$i]; |
|
|
$rule = $currentRules[$i]; |
|
|
|
|
|
|
|
|
if ($this->debug === 6 && $this->debugCount === 12) { |
|
|
|
|
|
if ($rule instanceof Pattern) { |
|
|
|
|
|
echo "Match: {$rule->match}\n"; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// If the rule is a Pattern and matches the line at the offset then... |
|
|
// If the rule is a Pattern and matches the line at the offset then... |
|
|
if ($rule instanceof Pattern && preg_match($rule->match, $line, $match, PREG_OFFSET_CAPTURE, $this->offset)) { |
|
|
if ($rule instanceof Pattern && preg_match($rule->match, $line, $match, PREG_OFFSET_CAPTURE, $this->offset)) { |
|
|
$match = [ |
|
|
// If the match's offset is the same as the current offset then it is the |
|
|
'match' => $match, |
|
|
// closest match. There's no need to iterate anymore through the patterns. |
|
|
'pattern' => $rule |
|
|
if ($match[0][1] === $this->offset) { |
|
|
]; |
|
|
$closestMatch = [ |
|
|
|
|
|
'match' => $match, |
|
|
if ($match['match'][0][1] === $this->offset) { |
|
|
'pattern' => $rule |
|
|
$nextMatch = $match; |
|
|
]; |
|
|
break 2; |
|
|
break 2; |
|
|
} elseif ($match['match'][0][1] < $nextMatch['match'][0][1]) { |
|
|
} |
|
|
$nextMatch = $match; |
|
|
// Otherwise, if the closest match is currently null or the match's offset is |
|
|
|
|
|
// less than the closest match's offset then set the match as the closest match |
|
|
|
|
|
// and continue looking for a closer one. |
|
|
|
|
|
elseif ($closestMatch === null || $match[0][1] < $closestMatch['match'][0][1]) { |
|
|
|
|
|
$closestMatch = [ |
|
|
|
|
|
'match' => $match, |
|
|
|
|
|
'pattern' => $rule |
|
|
|
|
|
]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into |
|
|
// Otherwise, if the rule is a Reference then retrieve its patterns, splice into |
|
@ -136,15 +139,20 @@ class Tokenizer { |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// If there were a match above... |
|
|
// If there were a match above... |
|
|
if ($nextMatch !== null) { |
|
|
if ($closestMatch !== null) { |
|
|
$match = $nextMatch['match']; |
|
|
$match = $closestMatch['match']; |
|
|
$pattern = $nextMatch['pattern']; |
|
|
$pattern = $closestMatch['pattern']; |
|
|
|
|
|
|
|
|
|
|
|
if ($this->debug === 7) { |
|
|
|
|
|
var_export($closestMatch); |
|
|
|
|
|
echo "\n"; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
// **¡TEMPORARY!** Haven't implemented begin and end line |
|
|
// **¡TEMPORARY!** Haven't implemented begin and end line |
|
|
// anchors, so let's toss patterns with them completely for now. |
|
|
// anchors, so let's toss patterns with them completely for now. |
|
|
if (preg_match('/\\\(?:A|G|Z)/', $rule->match)) { |
|
|
//if (preg_match('/\\\(?:A|G|Z)/', $rule->match)) { |
|
|
continue; |
|
|
// continue; |
|
|
} |
|
|
//} |
|
|
|
|
|
|
|
|
// If the subpattern begins after the offset then create a token from the bits |
|
|
// If the subpattern begins after the offset then create a token from the bits |
|
|
// of the line in-between the last token and the one(s) about to be created. |
|
|
// of the line in-between the last token and the one(s) about to be created. |
|
@ -257,6 +265,7 @@ class Tokenizer { |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// If the offset isn't at the end of the line then look for more matches. |
|
|
if ($this->offset !== $lineLength) { |
|
|
if ($this->offset !== $lineLength) { |
|
|
continue; |
|
|
continue; |
|
|
} |
|
|
} |
|
|