Browse Source

Decouple the tokenizer from the parser

This does not quite yet allow the tokenizer to be tested in isolation,
as the parse error emitter still assumes an instance of Parser is
available
split-manual
J. King 6 years ago
parent
commit
33403f721f
  1. 4
      lib/Parser.php
  2. 368
      lib/Tokenizer.php

4
lib/Parser.php

@ -98,7 +98,6 @@ class Parser {
protected function __construct() {
$this->insertionMode = static::INITIAL_MODE;
$this->quirksMode = static::QUIRKS_MODE_OFF;
$this->tokenizer = new Tokenizer();
$this->stack = new Stack();
$this->activeFormattingElementsList = new ActiveFormattingElementsList();
}
@ -124,6 +123,9 @@ class Parser {
// work on basic latin characters. Used extensively when tokenizing.
setlocale(LC_CTYPE, 'en_US.UTF8');
// initialize the tokenizer
static::$instance->tokenizer = new Tokenizer(static::$instance->data, static::$instance->stack);
// Run the tokenizer. Tokenizer runs until after the EOF token is emitted.
do {
$token = static::$instance->tokenizer->createToken();

368
lib/Tokenizer.php

File diff suppressed because it is too large
Loading…
Cancel
Save