From c5631050ccf492681a495c57421cb64297915ae1 Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Tue, 4 Sep 2018 08:28:25 -0500 Subject: [PATCH] Started Printer --- lib/ActiveFormattingElementsList.php | 2 +- lib/Exception.php | 46 +++++----- lib/Printer.php | 120 +++++++++++++++++++++++++++ lib/Stack.php | 6 +- 4 files changed, 144 insertions(+), 30 deletions(-) create mode 100644 lib/Printer.php diff --git a/lib/ActiveFormattingElementsList.php b/lib/ActiveFormattingElementsList.php index 1fd38e5..64ce509 100644 --- a/lib/ActiveFormattingElementsList.php +++ b/lib/ActiveFormattingElementsList.php @@ -24,7 +24,7 @@ class ActiveFormattingElementsList extends Stack { public function offsetSet($offset, $value) { if ($offset < 0 || $offset > count($this->_storage) - 1) { - throw new Exception(Exception::STACK_INVALID_INDEX); + throw new Exception(Exception::STACK_INVALID_INDEX, $offset); } if (is_null($offset)) { diff --git a/lib/Exception.php b/lib/Exception.php index c89a47a..f42e37f 100644 --- a/lib/Exception.php +++ b/lib/Exception.php @@ -7,47 +7,41 @@ class Exception extends \Exception { const UNKNOWN_ERROR = 10001; const INCORRECT_PARAMETERS_FOR_MESSAGE = 10002; - const PARSER_DOMDOCUMENT_EXPECTED = 10101; - const PARSER_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10102; - const PARSER_DOMNODE_EXPECTED = 10103; + const STACK_INVALID_INDEX = 10101; + const STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10102; - const STACK_INVALID_INDEX = 10201; - const STACK_DOMNODE_ONLY = 10202; - const STACK_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10203; + const DATA_NODATA = 10201; + const DATA_INVALID_DATA_CONSUMPTION_LENGTH = 10202; - const DATA_NODATA = 10301; - const DATA_INVALID_DATA_CONSUMPTION_LENGTH = 10302; + const DOM_DOMDOCUMENT_EXPECTED = 10301; + const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10302; - const DOM_DOMDOCUMENT_EXPECTED = 10401; - const DOM_DOMELEMENT_STRING_OR_CLOSURE_EXPECTED = 10402; + const TOKENIZER_INVALID_STATE = 10401; - const TOKENIZER_INVALID_STATE = 10501; + const TREEBUILDER_FORMELEMENT_EXPECTED = 10501; + const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10502; - const TREEBUILDER_FORMELEMENT_EXPECTED = 10601; - const TREEBUILDER_FRAGMENT_CONTEXT_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10602; + const PRINTER_DOMELEMENT_DOMDOCUMENT_DOMDOCUMENTFRAG_EXPECTED = 10601; protected static $messages = [10000 => 'Invalid error code', 10001 => 'Unknown error; escaping', 10002 => 'Incorrect number of parameters for Exception message; %s expected', - 10101 => 'DOMDocument expected; found %s', - 10102 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s', - 10103 => 'DOMNode expected; found %s', + 10101 => '%s is an invalid Stack index', + 10102 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected for fragment context; found %s', - 10201 => '%s is an invalid Stack index', - 10202 => 'Instances of DOMNode are the only types allowed in a Stack', - 10203 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected for fragment context; found %s', + 10201 => 'Data string expected; found %s', + 10202 => '%s is an invalid data consumption length; a value of 1 or above is expected', - 10301 => 'Data string expected; found %s', - 10302 => '%s is an invalid data consumption length; a value of 1 or above is expected', + 10301 => 'The first argument must be an instance of \DOMElement or null; found %s', + 10302 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s', - 10401 => 'The first argument must be an instance of \DOMElement or null; found %s', - 10402 => 'The first argument must either be an instance of \DOMElement, a string, or a closure; found %s', + 10401 => 'The Tokenizer has entered an invalid state', - 10501 => 'The Tokenizer has entered an invalid state', + 10501 => 'Form element expected, found %s', + 10502 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s', - 10601 => 'Form element expected, found %s', - 10602 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s']; + 10601 => 'DOMElement, DOMDocument, or DOMDocumentFragment expected; found %s',]; public function __construct(int $code, ...$args) { if (!isset(static::$messages[$code])) { diff --git a/lib/Printer.php b/lib/Printer.php new file mode 100644 index 0000000..4d8dc38 --- /dev/null +++ b/lib/Printer.php @@ -0,0 +1,120 @@ +childNodes->length; $i++) { + # 1. Let current node be the child node being processed. + $currentNode = $node->childNodes->item($i); + + # 2. Append the appropriate string from the following list to s: + ## If current node is an Element + ### If current node is an element in the HTML namespace, the MathML namespace, + ### or the SVG namespace, then let tagname be current node’s local name. + ### Otherwise, let tagname be current node’s qualified name. + if ($currentNode instanceof DOMElement && (is_null($currentNode->namespaceURI) || $currentNode->namespaceURI === Parser::MATHML_NAMESPACE || $currentNode->namespaceURI === Parser::SVG_NAMESPACE)) { + $tagName = $currentNode->localName; + } else { + $tagName = $currentNode->nodeName; + } + + ### Append a U+003C LESS-THAN SIGN character (<), followed by tagname. + $s .= "<$tagName"; + + ### For each attribute that the element has, append a U+0020 SPACE character, + ### the attribute’s serialized name as described below, a U+003D EQUALS SIGN + ### character (=), a U+0022 QUOTATION MARK character ("), the attribute’s value, + ### escaped as described below in attribute mode, and a second U+0022 QUOTATION + ### MARK character ("). + for ($j = 0; $j < $currentNode->attributes->length; $j++) { + $attr = $currentNode->attributes->item($j); + + # An attribute’s serialized name for the purposes of the previous paragraph + # must be determined as follows: + switch ($attr->namespaceURI) { + # If the attribute has no namespace + case null: + # The attribute’s serialized name is the attribute’s local name. + $name = $attr->localName; + break; + # If the attribute is in the XML namespace + case Parser::XML_NAMESPACE: + # The attribute’s serialized name is the string "xml:" followed by the + # attribute’s local name. + $name = 'xml:' . $attr->localName; + break; + # If the attribute is in the XMLNS namespace... + case Parser::XMLNS_NAMESPACE: + # ...and the attribute’s local name is xmlns + if ($attr->localName === 'xmlns') { + # The attribute’s serialized name is the string "xmlns". + $name = 'xmlns'; + } + # ...and the attribute’s local name is not xmlns + else { + # The attribute’s serialized name is the string "xmlns:" followed by the + # attribute’s local name. + $name = 'xmlns:' . $attr->localName; + } + break; + # If the attribute is in the XLink namespace + case Parser::XLINK_NAMESPACE: + # The attribute’s serialized name is the string "xlink:" followed by the + # attribute’s local name. + $name = 'xlink:' . $attr->localName; + break; + # If the attribute is in some other namespace + default: + # The attribute’s serialized name is the attribute’s qualified name. + $name = $attr->name; + } + + $value = static::escapeString($attr->value, true); + + $s .= " $name=\"$value\""; + } + + ### While the exact order of attributes is UA-defined, and may depend on factors + ### such as the order that the attributes were given in the original markup, the + ### sort order must be stable, such that consecutive invocations of this + ### algorithm serialize an element’s attributes in the same order. + // Okay. + + ### Append a U+003E GREATER-THAN SIGN character (>). + $s .= '>'; + } + } + + protected static escapeString(string $string, bool $attribute = false): string { + # Escaping a string (for the purposes of the algorithm above) consists of + # running the following steps: + ## 1. Replace any occurrence of the "&" character by the string "&amp;". + ## 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the + ## string "&nbsp;". + $string = str_replace(['&', chr(0x00A0)], ['&amp;', '&nbsp;'], $string); + ## 3. If the algorithm was invoked in the attribute mode, replace any + ## occurrences of the """ character by the string "&quot;". + ## 4. If the algorithm was not invoked in the attribute mode, replace any + ## occurrences of the "<" character by the string "&lt;", and any + ## occurrences of the ">" character by the string "&gt;". + if ($attribute) { + $string = str_replace(['"', '<', '>'], ['&quot;', '&lt;', '&gt;'], $string); + } + + return $string; + } +} \ No newline at end of file diff --git a/lib/Stack.php b/lib/Stack.php index 0443956..cd805d3 100644 --- a/lib/Stack.php +++ b/lib/Stack.php @@ -9,7 +9,7 @@ class Stack implements \ArrayAccess { public function offsetSet($offset, $value) { if ($offset < 0) { - throw new Exception(Exception::STACK_INVALID_INDEX); + throw new Exception(Exception::STACK_INVALID_INDEX, $offset); } if (is_null($offset)) { @@ -25,7 +25,7 @@ class Stack implements \ArrayAccess { public function offsetUnset($offset) { if ($offset < 0 || $offset > count($this->_storage) - 1) { - throw new Exception(Exception::STACK_INVALID_INDEX); + throw new Exception(Exception::STACK_INVALID_INDEX, $offset); } unset($this->_storage[$offset]); @@ -35,7 +35,7 @@ class Stack implements \ArrayAccess { public function offsetGet($offset) { if ($offset < 0 || $offset > count($this->_storage) - 1) { - throw new Exception(Exception::STACK_INVALID_INDEX); + throw new Exception(Exception::STACK_INVALID_INDEX, $offset); } return $this->_storage[$offset];