Browse Source

Added DOM creation

• Fixed bug where nonexistent grammars would cause tokenizer to fail.
• Added mensbeam/html as a dependency, removed docopt/docopt and 
ext-mbstring.
• Discovered bug when injections are removed from the stack when 
tokenizing, investigating.
main
Dustin Wilson 3 years ago
parent
commit
f944ca9b9c
  1. 14
      composer.json
  2. 233
      composer.lock
  3. 2
      lib/Grammar/GrammarReference.php
  4. 2
      lib/GrammarRegistry.php
  5. 57
      lib/Highlight.php
  6. 8
      lib/Tokenizer.php

14
composer.json

@ -1,5 +1,5 @@
{ {
"name": "dw/highlighter", "name": "dw/lit",
"type": "library", "type": "library",
"description": "TextMate-like syntax highlighting in PHP", "description": "TextMate-like syntax highlighting in PHP",
"license": "MIT", "license": "MIT",
@ -10,18 +10,22 @@
"homepage": "https://dustinwilson.com/" "homepage": "https://dustinwilson.com/"
} }
], ],
"repositories": [
{
"type": "vcs",
"url": "https://code.mensbeam.com/MensBeam/HTML"
}
],
"require": { "require": {
"php": "^7.4 || ^8.0", "php": "^7.4 || ^8.0",
"ext-dom": "*", "ext-dom": "*",
"ext-intl": "*", "ext-intl": "*",
"ext-json": "*", "ext-json": "*",
"ext-mbstring": "*", "mensbeam/html": "dev-master"
"docopt/docopt": "^1.0"
}, },
"autoload": { "autoload": {
"psr-4": { "psr-4": {
"dW\\Lit\\": "lib/", "dW\\Lit\\": "lib/"
"dW\\Lit\\Scope\\": "lib/Scope/Matchers/"
} }
} }
} }

233
composer.lock

@ -4,33 +4,212 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "7f3c88aa5023ebb6ebad3e513973d927", "content-hash": "4adc23361d14315afd30b4dd2846557e",
"packages": [ "packages": [
{ {
"name": "docopt/docopt", "name": "mensbeam/html",
"version": "1.0.4", "version": "dev-master",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/docopt/docopt.php.git", "url": "https://code.mensbeam.com/MensBeam/HTML",
"reference": "bf3683a16e09fa1665e493eb4d5a29469e132a4f" "reference": "4a7511dd49e85ad1865a3252176047266d62b087"
},
"require": {
"ext-dom": "*",
"mensbeam/intl": ">=0.9.0",
"mensbeam/mimesniff": "^0.2.0",
"php": ">=7.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"daux/daux.io": "^0.16.0",
"masterminds/html5": "^2.7"
},
"suggest": {
"ext-ctype": "Improved performance"
},
"default-branch": true,
"type": "library",
"autoload": {
"psr-4": {
"MensBeam\\HTML\\": [
"lib/",
"lib/DOM",
"lib/DOM/traits"
]
},
"classmap": [
"lib/Token.php"
],
"files": [
"lib/ctype.php"
]
},
"autoload-dev": {
"psr-4": {
"MensBeam\\HTML\\Test\\": "tests/lib/",
"MensBeam\\HTML\\TestCase\\": "tests/cases/"
}
},
"scripts": {
"post-install-cmd": [
"@composer bin all install"
],
"post-update-cmd": [
"@composer bin all update"
]
},
"license": [
"MIT"
],
"authors": [
{
"name": "Dustin Wilson",
"email": "dustin@dustinwilson.com",
"homepage": "https://dustinwilson.com/"
},
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "Parses modern HTML text into a PHP DOMDocument",
"time": "2021-04-26T21:07:13+00:00"
},
{
"name": "mensbeam/intl",
"version": "0.9.0",
"source": {
"type": "git",
"url": "https://github.com/mensbeam/intl.git",
"reference": "de037b182ce99aaa90ebc09b0ee0457ddf1d07bc"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/docopt/docopt.php/zipball/bf3683a16e09fa1665e493eb4d5a29469e132a4f", "url": "https://api.github.com/repos/mensbeam/intl/zipball/de037b182ce99aaa90ebc09b0ee0457ddf1d07bc",
"reference": "bf3683a16e09fa1665e493eb4d5a29469e132a4f", "reference": "de037b182ce99aaa90ebc09b0ee0457ddf1d07bc",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
"php": ">=5.3.0" "php": ">=7.1"
}, },
"require-dev": { "require-dev": {
"phpunit/phpunit": "4.1.*" "bamarni/composer-bin-plugin": "*",
"ext-intl": "*"
}, },
"type": "library", "type": "library",
"autoload": { "autoload": {
"classmap": [ "psr-4": {
"src/docopt.php" "MensBeam\\Intl\\": "lib/"
] }
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "A set of dependency-free basic internationalization tools",
"keywords": [
"WHATWG",
"charset",
"encoding",
"internationalization",
"intl",
"unicode",
"utf-8",
"utf8"
],
"support": {
"issues": "https://github.com/mensbeam/intl/issues",
"source": "https://github.com/mensbeam/intl/tree/0.9.0"
},
"time": "2021-03-25T19:08:04+00:00"
},
{
"name": "mensbeam/mimesniff",
"version": "0.2.1",
"source": {
"type": "git",
"url": "https://github.com/mensbeam/mime.git",
"reference": "c19be2496ab1e27fbf9c3483c2a9faa2781796cd"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/mime/zipball/c19be2496ab1e27fbf9c3483c2a9faa2781796cd",
"reference": "c19be2496ab1e27fbf9c3483c2a9faa2781796cd",
"shasum": ""
},
"require": {
"php": ">=7.1",
"psr/http-message": "^1.0"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"ext-intl": "*"
},
"type": "library",
"autoload": {
"psr-4": {
"MensBeam\\Mime\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "An implementation of the WHATWG MIME Sniffing specification",
"keywords": [
"WHATWG",
"mime",
"mimesniff"
],
"support": {
"issues": "https://github.com/mensbeam/mime/issues",
"source": "https://github.com/mensbeam/mime/tree/0.2.1"
},
"time": "2021-03-07T03:58:00+00:00"
},
{
"name": "psr/http-message",
"version": "1.0.1",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-message.git",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
}, },
"notification-url": "https://packagist.org/downloads/", "notification-url": "https://packagist.org/downloads/",
"license": [ "license": [
@ -38,37 +217,39 @@
], ],
"authors": [ "authors": [
{ {
"name": "Blake Williams", "name": "PHP-FIG",
"email": "code@shabbyrobe.org", "homepage": "http://www.php-fig.org/"
"homepage": "http://docopt.org/",
"role": "Developer"
} }
], ],
"description": "Port of Python's docopt for PHP >=5.3", "description": "Common interface for HTTP messages",
"homepage": "http://github.com/docopt/docopt.php", "homepage": "https://github.com/php-fig/http-message",
"keywords": [ "keywords": [
"cli", "http",
"docs" "http-message",
"psr",
"psr-7",
"request",
"response"
], ],
"support": { "support": {
"issues": "https://github.com/docopt/docopt.php/issues", "source": "https://github.com/php-fig/http-message/tree/master"
"source": "https://github.com/docopt/docopt.php/tree/1.0.4"
}, },
"time": "2019-12-03T02:48:46+00:00" "time": "2016-08-06T14:39:51+00:00"
} }
], ],
"packages-dev": [], "packages-dev": [],
"aliases": [], "aliases": [],
"minimum-stability": "stable", "minimum-stability": "stable",
"stability-flags": [], "stability-flags": {
"mensbeam/html": 20
},
"prefer-stable": false, "prefer-stable": false,
"prefer-lowest": false, "prefer-lowest": false,
"platform": { "platform": {
"php": "^7.4 || ^8.0", "php": "^7.4 || ^8.0",
"ext-dom": "*", "ext-dom": "*",
"ext-intl": "*", "ext-intl": "*",
"ext-json": "*", "ext-json": "*"
"ext-mbstring": "*"
}, },
"platform-dev": [], "platform-dev": [],
"plugin-api-version": "2.0.0" "plugin-api-version": "2.0.0"

2
lib/Grammar/GrammarReference.php

@ -24,7 +24,7 @@ class GrammarReference extends Reference {
} }
public function get(): Grammar { public function get(): Grammar|false {
return GrammarRegistry::get($this->_scopeName); return GrammarRegistry::get($this->_scopeName);
} }
} }

2
lib/GrammarRegistry.php

@ -16,7 +16,7 @@ class GrammarRegistry {
return true; return true;
} }
public static function get(string $scopeName): Grammar|bool { public static function get(string $scopeName): Grammar|false {
if (array_key_exists($scopeName, self::$storage)) { if (array_key_exists($scopeName, self::$storage)) {
return self::$storage[$scopeName]; return self::$storage[$scopeName];
} else { } else {

57
lib/Highlight.php

@ -6,15 +6,23 @@
declare(strict_types=1); declare(strict_types=1);
namespace dW\Lit; namespace dW\Lit;
use dW\Lit\Grammar\Exception; use dW\Lit\Grammar\Exception;
use MensBeam\HTML\{
Document,
Element
};
class Highlight { class Highlight {
public static function toDOM(string $data, string $scopeName) { public static function toDOM(string $data, string $scopeName, ?Document $document = null, string $encoding = 'windows-1252'): Element {
self::highlight($data, $scopeName); return self::highlight($data, $scopeName, $document, $encoding);
}
public static function toString(string $data, string $scopeName, string $encoding = 'windows-1252'): string {
return (string)self::highlight($data, $scopeName, null, $encoding);
} }
protected static function highlight(string $data, string $scopeName) { protected static function highlight(string $data, string $scopeName, ?Document $document = null, string $encoding = 'windows-1252'): Element {
$grammar = GrammarRegistry::get($scopeName); $grammar = GrammarRegistry::get($scopeName);
if ($grammar === false) { if ($grammar === false) {
throw new Exception(Exception::GRAMMAR_MISSING, $scopeName); throw new Exception(Exception::GRAMMAR_MISSING, $scopeName);
@ -23,7 +31,50 @@ class Highlight {
$tokenizer = new Tokenizer(new Data($data), $grammar); $tokenizer = new Tokenizer(new Data($data), $grammar);
$tokenList = $tokenizer->tokenize(); $tokenList = $tokenizer->tokenize();
if ($document === null) {
$document = new Document();
$document->encoding = $encoding;
}
$pre = $document->createElement('pre');
$code = $document->createElement('code');
$code->setAttribute('class', str_replace('.', ' ', $scopeName));
$pre->appendChild($code);
$elementStack = [ $code ];
$scopeStack = [ $scopeName ];
foreach ($tokenList as $lineNumber => $tokens) { foreach ($tokenList as $lineNumber => $tokens) {
continue;
foreach ($tokens as $token) {
$lastKey = count($token['scopes']) - 1;
foreach ($token['scopes'] as $key => $scope) {
$keyExists = array_key_exists($key, $scopeStack);
if (!$keyExists || $scopeStack[$key] !== $scope) {
if ($keyExists && $scopeStack[$key] !== $scope) {
$scopeStack = array_slice($scopeStack, 0, $key);
$elementStack = array_slice($elementStack, 0, $key);
}
$span = $document->createElement('span');
$span->setAttribute('class', str_replace('.', ' ', $scope));
end($elementStack)->appendChild($span);
$scopeStack[] = $scope;
$elementStack[] = $span;
}
if ($key === $lastKey) {
if (array_key_exists($key + 1, $scopeStack)) {
$scopeStack = array_slice($scopeStack, 0, $key + 1);
$elementStack = array_slice($elementStack, 0, $key + 1);
}
end($elementStack)->appendChild($document->createTextNode($token['text']));
}
}
}
} }
return $pre;
} }
} }

8
lib/Tokenizer.php

@ -110,7 +110,7 @@ class Tokenizer {
if ($selector->matches($this->scopeStack)) { if ($selector->matches($this->scopeStack)) {
$prefix = $selector->getPrefix($this->scopeStack); $prefix = $selector->getPrefix($this->scopeStack);
if ($prefix === Filter::PREFIX_LEFT || $prefix === Filter::PREFIX_BOTH) { if ($prefix === Filter::PREFIX_LEFT || $prefix === Filter::PREFIX_BOTH) {
$this->scopeStack[] = $injection; $this->ruleStack[] = $injection;
$this->activeInjection = $injection; $this->activeInjection = $injection;
break; break;
} }
@ -131,6 +131,12 @@ class Tokenizer {
while (true) { while (true) {
$rule = $currentRules[$i]; $rule = $currentRules[$i];
// Grammar references can return false if the grammar does not exist, so
// continue on if the current rule is false.
if ($rule === false) {
continue 2;
}
// If the rule is a Pattern // If the rule is a Pattern
if ($rule instanceof Pattern) { if ($rule instanceof Pattern) {
if (preg_match($rule->match, $this->line . ((!$this->data->lastLine) ? "\n" : ''), $match, PREG_OFFSET_CAPTURE, $this->offset) === 1) { if (preg_match($rule->match, $this->line . ((!$this->data->lastLine) ? "\n" : ''), $match, PREG_OFFSET_CAPTURE, $this->offset) === 1) {

Loading…
Cancel
Save