diff --git a/lib/Grammar.php b/lib/Grammar.php index bcaf029..acccf90 100644 --- a/lib/Grammar.php +++ b/lib/Grammar.php @@ -42,7 +42,11 @@ class Grammar { } - /** Imports an Atom JSON grammar into the Grammar object */ + /** + * Imports an Atom JSON grammar into the Grammar object + * + * @param string $filename - The JSON file to be imported + */ public function loadJSON(string $filename) { if (!is_file($filename)) { throw new Exception(Exception::JSON_INVALID_FILE, $filename); diff --git a/lib/GrammarRegistry.php b/lib/GrammarRegistry.php index ecee453..25f46e0 100644 --- a/lib/GrammarRegistry.php +++ b/lib/GrammarRegistry.php @@ -16,6 +16,12 @@ class GrammarRegistry { return true; } + /** + * Retrieves a grammar from the registry + * + * @param string $scopeName - The scope name (eg: text.html.php) of the grammar that is being requested + * @return dW\Lit\Grammar|false + */ public static function get(string $scopeName): Grammar|false { if (array_key_exists($scopeName, self::$storage)) { return self::$storage[$scopeName]; @@ -31,6 +37,13 @@ class GrammarRegistry { return false; } + /** + * Sets a grammar in the registry. + * + * @param string $scopeName - The scope name (eg: text.html.php) of the grammar that is being set + * @param dW\Lit\Grammar - The grammar to be put into the registry + * @return bool + */ public static function set(string $scopeName, Grammar $grammar): bool { try { self::$storage[$scopeName] = $grammar; diff --git a/lib/Highlight.php b/lib/Highlight.php index 85db259..dc557d5 100644 --- a/lib/Highlight.php +++ b/lib/Highlight.php @@ -13,10 +13,27 @@ use MensBeam\HTML\{ class Highlight { - public static function toDOM(string $data, string $scopeName, ?Document $document = null, string $encoding = 'windows-1252'): Element { + /** + * Highlights incoming string data and outputs an HTML DOM Mensbeam\HTML\Element. + * + * @param string $data - The input data string. + * @param string $scopeName - The scope name (eg: text.html.php) of the grammar that's needed to highlight the input data. + * @param ?Mensbeam\HTML\Document [$document = null] - An existing MensBeam\HTML\Document to use as the owner document of the returned MensBeam\HTML\Element; if omitted one will be created instead. + * @param string [$encoding = 'windows-1252'] - If a document isn't provided an encoding may be provided for the new document; the HTML standard default windows-1252 is used if no encoding is provided. + * @return Mensbeam\HTML\Element + */ + public static function toElement(string $data, string $scopeName, ?Document $document = null, string $encoding = 'windows-1252'): Element { return self::highlight($data, $scopeName, $document, $encoding); } + /** + * Highlights incoming string data and outputs an HTML string. + * + * @param string $data - The input data string. + * @param string $scopeName - The scope name (eg: text.html.php) of the grammar that's needed to highlight the input data. + * @param string [$encoding = 'windows-1252'] - Encoding for the input string data; the HTML standard default windows-1252 is used if no encoding is provided. + * @return string + */ public static function toString(string $data, string $scopeName, string $encoding = 'windows-1252'): string { return (string)self::highlight($data, $scopeName, null, $encoding); } @@ -38,7 +55,7 @@ class Highlight { $pre = $document->createElement('pre'); $code = $document->createElement('code'); - $code->setAttribute('class', str_replace('.', ' ', $scopeName)); + $code->setAttribute('class', implode(' ', array_unique(explode('.', $scopeName)))); $pre->appendChild($code); $elementStack = [ $code ]; @@ -56,7 +73,7 @@ class Highlight { } $span = $document->createElement('span'); - $span->setAttribute('class', str_replace('.', ' ', $scope)); + $span->setAttribute('class', implode(' ', array_unique(explode('.', $scope)))); end($elementStack)->appendChild($span); $scopeStack[] = $scope; $elementStack[] = $span; diff --git a/lib/Tokenizer.php b/lib/Tokenizer.php index 513273f..4a72a6e 100644 --- a/lib/Tokenizer.php +++ b/lib/Tokenizer.php @@ -17,14 +17,22 @@ use dW\Lit\Scope\{ }; +/** Class for tokenizing input data */ class Tokenizer { + // Used for debugging; assertions (`ini_set('zend.assertions', '1')`) must be + // enabled to see debug output. public static bool $debug = false; - + // The input Data class. protected Data $data; + // The supplied Grammar used to highlight the input data. protected Grammar $grammar; + // The offset/position on the line the tokenizer is currently at. protected int $offset = 0; + // Flag used to tell the tokenizer an injection is on the rule stack. protected bool $activeInjection = false; + // The current line being tokenized. protected string $line = ''; + // The current line number of the input data being tokenized. protected int $lineNumber = 1; // Cache of rule lists which have had references spliced to keep from having to // repeatedly splice in the same reference. It needs to be in two arrays because @@ -32,7 +40,9 @@ class Tokenizer { // itself. protected array $ruleCacheIndexes = []; protected array $ruleCacheValues = []; + // The stack of rules protected array $ruleStack; + // The stack of scopes protected array $scopeStack; protected const SCOPE_RESOLVE_REGEX = '/\$(\d+)|\${(\d+):\/(downcase|upcase)}/S'; @@ -46,7 +56,7 @@ class Tokenizer { $this->scopeStack = [ $this->grammar->scopeName ]; } - + /** Receives lines from the Data object and yields an array of tokens */ public function tokenize(): \Generator { foreach ($this->data->get() as $lineNumber => $line) { $this->lineNumber = $lineNumber;