You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
10021 lines
371 KiB
10021 lines
371 KiB
<?php
|
|
|
|
# Deviations:
|
|
# 1. Because of how this is used there's no need for navigation of a browsing
|
|
# context or scripting.
|
|
# 2. Only UTF-8 is supported. This means all methods the parser is supposed to
|
|
# use to convert and detect encodings are ignored except converting to the
|
|
# UTF-8 encoding while processing the input stream.
|
|
# 3. NULL characters are stripped from the document instead of being
|
|
# categorically converted to replacement characters. NULL characters are
|
|
# invalid anyway, and NULL characters can't be injected into the document
|
|
# through scripting in this implementation so stripping them makes more sense.
|
|
# 4. Comments before the DOCTYPE will be stripped from the document. It's a
|
|
# limitation of PHP5's DOM which is what is used in tree building in this
|
|
# implementation.
|
|
# 5. PHP's DOM cannot accept an empty DOCTYPE qualified name, so when an
|
|
# empty one is encountered it is replaced with 'html'.
|
|
# 6. The DOM serializer (HTML5::serialize()) in this class is different from
|
|
# what's specified in the spec (§13.3). While it's based upon it this
|
|
# implementation is capable of accurately printing foreign content.
|
|
|
|
class HTML5
|
|
{
|
|
# Debug flag.
|
|
public static $debug=0;
|
|
|
|
# DOMDocument.
|
|
protected static $DOM=null;
|
|
|
|
# DOMDocumentFragment used when parsing fragments.
|
|
protected static $DOMFragment=null;
|
|
|
|
# List of active formatting elements used by the tree builder.
|
|
protected static $active=array();
|
|
|
|
# Size of static::$active.
|
|
protected static $activeSize=0;
|
|
|
|
# Context element used when parsing fragments.
|
|
protected static $context=null;
|
|
|
|
# Input data that's being parsed.
|
|
protected static $data=null;
|
|
|
|
# Length of the input data.
|
|
protected static $EOF=0;
|
|
|
|
# Temporary buffer used by some states.
|
|
protected static $buffer='';
|
|
|
|
# Temporary attribute name buffer used by some states. Not in spec.
|
|
# Used to store the attribute name as it's being built. That way the
|
|
# attributes can be stored in a more efficient manner.
|
|
protected static $attributenamebuffer='';
|
|
|
|
protected static $parseErrors=array('tag name expected' => 'Tag name expected; found %s',
|
|
'tag end expected' => 'Tag end expected; found %s',
|
|
'attribute name expected' => 'Attribute name expected; found %s',
|
|
'attribute exists' => 'Attribute %s already exists',
|
|
'attribute value tag end expected' => 'Attribute value or tag end expected; found %s',
|
|
'attribute value expected' => 'Attribute value expected; found %s',
|
|
'unquoted attribute value expected' => 'Unquoted attribute value expected; found %s',
|
|
'attribute name tag end expected' => 'Attribute name or tag end expected; found %s',
|
|
'doctype dashes cdata expected' => 'DOCTYPE, dashes, or CDATA expected; found %s',
|
|
'comment expected' => 'Comment data expected; found %s',
|
|
'comment end expected' => 'Comment end expected; found %s',
|
|
'doctype name expected' => 'DOCTYPE name expected; found %s',
|
|
'doctype keyword tag end expected' => 'DOCTYPE keyword or tag end expected; found %s',
|
|
'doctype public identifier expected' => 'DOCTYPE public identifier expected; found %s',
|
|
'double-quoted doctype public identifier expected' => 'Double-quoted DOCTYPE public identifier expected; found %s',
|
|
'single-quoted doctype public identifier expected' => 'Single-quoted DOCTYPE public identifier expected; found %s',
|
|
'doctype system identifier expected' => 'DOCTYPE system identifier expected; found %s',
|
|
'double-quoted doctype system identifier expected' => 'Double-quoted DOCTYPE system identifier expected; found %s',
|
|
'single-quoted doctype system identifier expected' => 'Single-quoted DOCTYPE system identifier expected; found %s',
|
|
'unexpected eof tag name' => 'Unexpected end of file; tag name expected',
|
|
'unexpected eof escaped script data' => 'Unexpected end of file; escaped script data expected',
|
|
'unexpected eof double escaped script data' => 'Unexpected end of file; double escaped script data expected',
|
|
'unexpected eof attribute name' => 'Unexpected end of file; attribute name expected',
|
|
'unexpected eof attribute value' => 'Unexpected end of file; attribute value expected',
|
|
'unexpected eof unquoted attribute value' => 'Unexpected end of file; unquoted attribute value expected',
|
|
'unexpected eof attribute value tag end' => 'Unexpected end of file; attribute value or tag end expected',
|
|
'unexpected eof attribute name tag end' => 'Unexpected end of file; attribute name or tag end expected',
|
|
'unexpected eof comment' => 'Unexpected end of file; comment expected',
|
|
'unexpected eof comment end' => 'Unexpected end of file; comment end expected',
|
|
'unexpected eof doctype name' => 'Unexpected end of file; DOCTYPE name expected',
|
|
'unexpected eof doctype keyword end tag' => 'Unexpected end of file; DOCTYPE keyword or end tag expected',
|
|
'unexpected eof doctype public identifier' => 'Unexpected end of file; DOCTYPE public identifier expected',
|
|
'unexpected eof doctype system identifier' => 'Unexpected end of file; DOCTYPE system identifier expected',
|
|
'control or noncharacters' => 'Control or permanently undefined unicode character in input stream',
|
|
'numeric entity expected' => 'Numeric entity expected; found %s',
|
|
'semicolon terminator expected' => 'Semicolon entity terminator expected; found %s',
|
|
'invalid numeric entity' => 'Invalid numeric entity; replacing with an appropriate entity',
|
|
'illegal codepoint' => 'Illegal codepoint for a numeric entity; replacing with a U+FFFD replacement character',
|
|
'invalid named entity' => 'Invalid named entity',
|
|
'doctype expected character' => 'DOCTYPE expected; found %s',
|
|
'doctype expected start tag' => 'DOCTYPE expected; found %s start tag',
|
|
'doctype expected end tag' => 'DOCTYPE expected; found %s end tag',
|
|
'unexpected doctype' => 'Unexpected DOCTYPE; the current open element is %s',
|
|
'unexpected start tag' => 'Unexpected %s start tag; the current open element is %s',
|
|
'attributes in end tag' => 'Attributes found in %s end tag',
|
|
'self-closing end tag' => '%s end tag cannot be self-closing',
|
|
'unexpected end tag' => 'Unexpected %s end tag; the current open element is %s',
|
|
'invalid doctype' => 'Invalid DOCTYPE',
|
|
'unexpected character' => 'Unexpected %s; the current open element is %s',
|
|
'unexpected eof' => 'Unexpected end of file; the current open element is %s',
|
|
'invalid start tag' => '%s start tag is invalid; replaced with %s element',
|
|
'invalid end tag' => '%s end tag is invalid; replaced with %s element',
|
|
'invalid foreign attribute' => '%s element\'s %s attribute is invalid; should be %s');
|
|
|
|
protected static $fatalErrors=array('domdocument expected' => 'DOMDocument expected',
|
|
'string expected' => 'String expected; found %s',
|
|
'callback expected' => 'Callback expected, found %s',
|
|
'invalid consume length' => 'Consume length must be greater than 0',
|
|
'invalid peek length' => 'Peek length must be greater than 0',
|
|
'method expected' => 'Class method name expected',
|
|
'string array closure expected' => 'String, array, or closure expected; found %s',
|
|
'closure expected' => 'Closure expected; found instance of %s',
|
|
'domnode expected' => 'Instance of DOMNode expected; found %s',
|
|
'domelement document frag expected' => 'Instance of DOMElement, DOMDocument, or DOMDocumentFrag expected; found %s',
|
|
'invalid option value' => 'Invalid value for option %s; %s expected; found %s',
|
|
'invalid option value type' => 'Invalid value type for option %s; %s expected; found %s',
|
|
'invalid parse error' => '%s is an invalid parse error',
|
|
'invalid fatal error' => '%s is an invalid fatal error');
|
|
|
|
//const PARSE_ERROR_TAG_NAME_EXPECTED = 0;
|
|
//const PARSE_ERROR_TAG_END_EXPECTED = 1;
|
|
//const PARSE_ERROR_ATTRIBUTE_NAME_EXPECTED = 2;
|
|
//const PARSE_ERROR_ATTRIBUTE_EXISTS = 3;
|
|
|
|
# Element table for foreign attribute adjustments.
|
|
protected static $foreignAttributes=array('xlink:actuate' => 'http://www.w3.org/1999/xlink',
|
|
'xlink:arcrole' => 'http://www.w3.org/1999/xlink',
|
|
'xlink:href' => 'http://www.w3.org/1999/xlink',
|
|
'xlink:role' => 'http://www.w3.org/1999/xlink',
|
|
'xlink:show' => 'http://www.w3.org/1999/xlink',
|
|
'xlink:title' => 'http://www.w3.org/1999/xlink',
|
|
'xlink:type' => 'http://www.w3.org/1999/xlink',
|
|
'xml:base' => 'http://www.w3.org/XML/1998/namespace',
|
|
'xml:lang' => 'http://www.w3.org/XML/1998/namespace',
|
|
'xml:space' => 'http://www.w3.org/XML/1998/namespace',
|
|
'xmlns' => 'http://www.w3.org/2000/xmlns/',
|
|
'xmlns:xlink' => 'http://www.w3.org/2000/xmlns/');
|
|
|
|
# Used by the tree builder to house the parsed form element.
|
|
protected static $form=null;
|
|
|
|
# Used by the tree builder to determine if foster parenting is needed instead
|
|
# of inserting elements.
|
|
protected static $fosterParenting=false;
|
|
|
|
# Used by the tree builder to determine if the current algorithm is a fragment.
|
|
protected static $fragment=false;
|
|
|
|
# Flag used by the tree builder to determine if framesets are okay to use.
|
|
protected static $framesetOk=true;
|
|
|
|
# Used by the tree builder to house the parsed head element.
|
|
protected static $head=null;
|
|
|
|
# Used by extended classes to see if the emitted token was HTML or foreign
|
|
# content.
|
|
protected static $htmlContent=false;
|
|
|
|
# Elements that have implied end tags.
|
|
protected static $impliedElements=array('dd','dt','li','option','optgroup','p','rp','rt');
|
|
|
|
# Used by the tree building to house pending table character tokens.
|
|
protected static $pendingTableCharacterTokens=array();
|
|
|
|
# Used when parsing is completed to fix the PHP id attribute bug. Allows
|
|
# DOMDocument->getElementById() to work on id attributes.
|
|
protected static $relaxNG=<<<'NOWDOC'
|
|
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
|
<start>
|
|
<element>
|
|
<anyName/>
|
|
<ref name="anythingID"/>
|
|
</element>
|
|
</start>
|
|
<define name="anythingID">
|
|
<zeroOrMore>
|
|
<choice>
|
|
<element>
|
|
<anyName/>
|
|
<ref name="anythingID"/>
|
|
</element>
|
|
<attribute name="id">
|
|
<data type="ID"/>
|
|
</attribute>
|
|
<zeroOrMore>
|
|
<attribute><anyName/></attribute>
|
|
</zeroOrMore>
|
|
<text/>
|
|
</choice>
|
|
</zeroOrMore>
|
|
</define>
|
|
</grammar>
|
|
NOWDOC;
|
|
|
|
# Element table for SVG element attribute adjustments.
|
|
protected static $svgAttributes=array('attributename'=>'attributeName',
|
|
'attributetype'=>'attributeType',
|
|
'basefrequency'=>'baseFrequency',
|
|
'baseprofile'=>'baseProfile',
|
|
'calcmode'=>'calcMode',
|
|
'clippathunits'=>'clipPathUnits',
|
|
'contentscripttype'=>'contentScriptType',
|
|
'contentstyletype'=>'contentStyleType',
|
|
'diffuseconstant'=>'diffuseConstant',
|
|
'edgemode'=>'edgeMode',
|
|
'externalresourcesrequired'=>'externalResourcesRequired',
|
|
'filterres'=>'filterRes',
|
|
'filterunits'=>'filterUnits',
|
|
'glyphref'=>'glyphRef',
|
|
'gradienttransform'=>'gradientTransform',
|
|
'gradientunits'=>'gradientUnits',
|
|
'kernelmatrix'=>'kernelMatrix',
|
|
'kernelunitlength'=>'kernelUnitLength',
|
|
'keypoints'=>'keyPoints',
|
|
'keysplines'=>'keySplines',
|
|
'keytimes'=>'keyTimes',
|
|
'lengthadjust'=>'lengthAdjust',
|
|
'limitingconeangle'=>'limitingConeAngle',
|
|
'markerheight'=>'markerHeight',
|
|
'markerunits'=>'markerUnits',
|
|
'markerwidth'=>'markerWidth',
|
|
'maskcontentunits'=>'maskContentUnits',
|
|
'maskunits'=>'maskUnits',
|
|
'numoctaves'=>'numOctaves',
|
|
'pathlength'=>'pathLength',
|
|
'patterncontentunits'=>'patternContentUnits',
|
|
'patterntransform'=>'patternTransform',
|
|
'patternunits'=>'patternUnits',
|
|
'pointsatx'=>'pointsAtX',
|
|
'pointsaty'=>'pointsAtY',
|
|
'pointsatz'=>'pointsAtZ',
|
|
'preservealpha'=>'preserveAlpha',
|
|
'preserveaspectratio'=>'preserveAspectRatio',
|
|
'primitiveunits'=>'primitiveUnits',
|
|
'refx'=>'refX',
|
|
'refy'=>'refY',
|
|
'repeatcount'=>'repeatCount',
|
|
'repeatdur'=>'repeatDur',
|
|
'requiredextensions'=>'requiredExtensions',
|
|
'requiredfeatures'=>'requiredFeatures',
|
|
'specularconstant'=>'specularConstant',
|
|
'specularexponent'=>'specularExponent',
|
|
'spreadmethod'=>'spreadMethod',
|
|
'startoffset'=>'startOffset',
|
|
'stddeviation'=>'stdDeviation',
|
|
'stitchtiles'=>'stitchTiles',
|
|
'surfacescale'=>'surfaceScale',
|
|
'systemlanguage'=>'systemLanguage',
|
|
'tablevalues'=>'tableValues',
|
|
'targetx'=>'targetX',
|
|
'targety'=>'targetY',
|
|
'textlength'=>'textLength',
|
|
'viewbox'=>'viewBox',
|
|
'viewtarget'=>'viewTarget',
|
|
'xchannelselector'=>'xChannelSelector',
|
|
'ychannelselector'=>'yChannelSelector',
|
|
'zoomandpan'=>'zoomAndPan');
|
|
|
|
protected static $svgElements=array('altglyph'=>'altGlyph',
|
|
'altglyphdef'=>'altGlyphDef',
|
|
'altglyphitem'=>'altGlyphItem',
|
|
'animatecolor'=>'animateColor',
|
|
'animatemotion'=>'animateMotion',
|
|
'animatetransform'=>'animateTransform',
|
|
'clippath'=>'clipPath',
|
|
'feblend'=>'feBlend',
|
|
'fecolormatrix'=>'feColorMatrix',
|
|
'fecomponenttransfer'=>'feComponentTransfer',
|
|
'fecomposite'=>'feComposite',
|
|
'feconvolvematrix'=>'feConvolveMatrix',
|
|
'fediffuselighting'=>'feDiffuseLighting',
|
|
'fedisplacementmap'=>'feDisplacementMap',
|
|
'fedistantlight'=>'feDistantLight',
|
|
'feflood'=>'feFlood',
|
|
'fefunca'=>'feFuncA',
|
|
'fefuncb'=>'feFuncB',
|
|
'fefuncg'=>'feFuncG',
|
|
'fefuncr'=>'feFuncR',
|
|
'fegaussianblur'=>'feGaussianBlur',
|
|
'feimage'=>'feImage',
|
|
'femerge'=>'feMerge',
|
|
'femergenode'=>'feMergeNode',
|
|
'femorphology'=>'feMorphology',
|
|
'feoffset'=>'feOffset',
|
|
'fepointlight'=>'fePointLight',
|
|
'fespecularlighting'=>'feSpecularLighting',
|
|
'fespotlight'=>'feSpotLight',
|
|
'fetile'=>'feTile',
|
|
'feturbulence'=>'feTurbulence',
|
|
'foreignobject'=>'foreignObject',
|
|
'glyphref'=>'glyphRef',
|
|
'lineargradient'=>'linearGradient',
|
|
'radialgradient'=>'radialGradient',
|
|
'textpath'=>'textPath');
|
|
|
|
# Used in the "in head" insertion mode. Done this way to make extending the class easier.
|
|
protected static $headElements=array('base','basefont','bgsound','menuitem','link');
|
|
protected static $rawtextHeadElements=array('noframes','style');
|
|
protected static $rcdataHeadElements=array('title');
|
|
|
|
# Used when pretty printing in HTML5::serialize().
|
|
protected static $blockElements=array('address','article',
|
|
'aside','blockquote',
|
|
'body','canvas',
|
|
'dd','dir','div','dl',
|
|
'dt','fieldset',
|
|
'figcaption','figure',
|
|
'footer','form',
|
|
'frame','frameset',
|
|
'h1','h2','h3','h4',
|
|
'h5','h6','head',
|
|
'header','hgroup',
|
|
'hr','html','li','main',
|
|
'menu','nav','ol',
|
|
'option','output','p',
|
|
'pre','section','select',
|
|
'source','table','tbody','td',
|
|
'th','thead','tr','ul',
|
|
'#document');
|
|
|
|
# Used when pretty printing in HTML5::serialize().
|
|
protected static $spacedBlockElements=array('address','article',
|
|
'aside','blockquote',
|
|
'body','canvas','dir',
|
|
'div','dl','fieldset',
|
|
'figure','footer',
|
|
'form','frame',
|
|
'frameset','h1','h2',
|
|
'h3','h4','h5','h6',
|
|
'head','header',
|
|
'hgroup','hr','html','main',
|
|
'menu','nav','ol','p',
|
|
'pre','section','source','table','ul');
|
|
|
|
# Used when pretty printing in HTML5::serialize();
|
|
protected static $selfClosingElements=array('area','base','basefont','bgsound',
|
|
'br','col','command','embed','frame',
|
|
'hr','img','input','keygen','link',
|
|
'meta','param','source','track','wbr');
|
|
|
|
# Used when pretty printing in HTML5::serialize();
|
|
protected static $preElements=array('pre','title');
|
|
|
|
# Used when pretty printing in HTML5::serialize();
|
|
protected static $scriptElements=array('script','style');
|
|
|
|
# Used when pretty printing in HTML5::serialize();
|
|
protected static $headBlockElements=array('script','style');
|
|
|
|
# Controls the primary operation of the tree builder.
|
|
protected static $mode='initial';
|
|
|
|
protected static $entities=array('AElig'=>'Æ',
|
|
'AElig;'=>'Æ',
|
|
'AMP'=>'&',
|
|
'AMP;'=>'&',
|
|
'Aacute'=>'Á',
|
|
'Aacute;'=>'Á',
|
|
'Abreve;'=>'Ă',
|
|
'Acirc'=>'Â',
|
|
'Acirc;'=>'Â',
|
|
'Acy;'=>'А',
|
|
'Afr;'=>'프',
|
|
'Agrave'=>'À',
|
|
'Agrave;'=>'À',
|
|
'Alpha;'=>'Α',
|
|
'Amacr;'=>'Ā',
|
|
'And;'=>'⩓',
|
|
'Aogon;'=>'Ą',
|
|
'Aopf;'=>'픸',
|
|
'ApplyFunction;'=>'',
|
|
'Aring'=>'Å',
|
|
'Aring;'=>'Å',
|
|
'Ascr;'=>'풜',
|
|
'Assign;'=>'≔',
|
|
'Atilde'=>'Ã',
|
|
'Atilde;'=>'Ã',
|
|
'Auml'=>'Ä',
|
|
'Auml;'=>'Ä',
|
|
'Backslash;'=>'∖',
|
|
'Barv;'=>'⫧',
|
|
'Barwed;'=>'⌆',
|
|
'Bcy;'=>'Б',
|
|
'Because;'=>'∵',
|
|
'Bernoullis;'=>'ℬ',
|
|
'Beta;'=>'Β',
|
|
'Bfr;'=>'픅',
|
|
'Bopf;'=>'픹',
|
|
'Breve;'=>'˘',
|
|
'Bscr;'=>'ℬ',
|
|
'Bumpeq;'=>'≎',
|
|
'CHcy;'=>'Ч',
|
|
'COPY'=>'©',
|
|
'COPY;'=>'©',
|
|
'Cacute;'=>'Ć',
|
|
'Cap;'=>'⋒',
|
|
'CapitalDifferentialD;'=>'ⅅ',
|
|
'Cayleys;'=>'ℭ',
|
|
'Ccaron;'=>'Č',
|
|
'Ccedil'=>'Ç',
|
|
'Ccedil;'=>'Ç',
|
|
'Ccirc;'=>'Ĉ',
|
|
'Cconint;'=>'∰',
|
|
'Cdot;'=>'Ċ',
|
|
'Cedilla;'=>'¸',
|
|
'CenterDot;'=>'·',
|
|
'Cfr;'=>'ℭ',
|
|
'Chi;'=>'Χ',
|
|
'CircleDot;'=>'⊙',
|
|
'CircleMinus;'=>'⊖',
|
|
'CirclePlus;'=>'⊕',
|
|
'CircleTimes;'=>'⊗',
|
|
'ClockwiseContourIntegral;'=>'∲',
|
|
'CloseCurlyDoubleQuote;'=>'”',
|
|
'CloseCurlyQuote;'=>'’',
|
|
'Colon;'=>'∷',
|
|
'Colone;'=>'⩴',
|
|
'Congruent;'=>'≡',
|
|
'Conint;'=>'∯',
|
|
'ContourIntegral;'=>'∮',
|
|
'Copf;'=>'ℂ',
|
|
'Coproduct;'=>'∐',
|
|
'CounterClockwiseContourIntegral;'=>'∳',
|
|
'Cross;'=>'⨯',
|
|
'Cscr;'=>'풞',
|
|
'Cup;'=>'⋓',
|
|
'CupCap;'=>'≍',
|
|
'DD;'=>'ⅅ',
|
|
'DDotrahd;'=>'⤑',
|
|
'DJcy;'=>'Ђ',
|
|
'DScy;'=>'Ѕ',
|
|
'DZcy;'=>'Џ',
|
|
'Dagger;'=>'‡',
|
|
'Darr;'=>'↡',
|
|
'Dashv;'=>'⫤',
|
|
'Dcaron;'=>'Ď',
|
|
'Dcy;'=>'Д',
|
|
'Del;'=>'∇',
|
|
'Delta;'=>'Δ',
|
|
'Dfr;'=>'픇',
|
|
'DiacriticalAcute;'=>'´',
|
|
'DiacriticalDot;'=>'˙',
|
|
'DiacriticalDoubleAcute;'=>'˝',
|
|
'DiacriticalGrave;'=>'`',
|
|
'DiacriticalTilde;'=>'˜',
|
|
'Diamond;'=>'⋄',
|
|
'DifferentialD;'=>'ⅆ',
|
|
'Dopf;'=>'픻',
|
|
'Dot;'=>'¨',
|
|
'DotDot;'=>'⃜',
|
|
'DotEqual;'=>'≐',
|
|
'DoubleContourIntegral;'=>'∯',
|
|
'DoubleDot;'=>'¨',
|
|
'DoubleDownArrow;'=>'⇓',
|
|
'DoubleLeftArrow;'=>'⇐',
|
|
'DoubleLeftRightArrow;'=>'⇔',
|
|
'DoubleLeftTee;'=>'⫤',
|
|
'DoubleLongLeftArrow;'=>'⟸',
|
|
'DoubleLongLeftRightArrow;'=>'⟺',
|
|
'DoubleLongRightArrow;'=>'⟹',
|
|
'DoubleRightArrow;'=>'⇒',
|
|
'DoubleRightTee;'=>'⊨',
|
|
'DoubleUpArrow;'=>'⇑',
|
|
'DoubleUpDownArrow;'=>'⇕',
|
|
'DoubleVerticalBar;'=>'∥',
|
|
'DownArrow;'=>'↓',
|
|
'DownArrowBar;'=>'⤓',
|
|
'DownArrowUpArrow;'=>'⇵',
|
|
'DownBreve;'=>'̑',
|
|
'DownLeftRightVector;'=>'⥐',
|
|
'DownLeftTeeVector;'=>'⥞',
|
|
'DownLeftVector;'=>'↽',
|
|
'DownLeftVectorBar;'=>'⥖',
|
|
'DownRightTeeVector;'=>'⥟',
|
|
'DownRightVector;'=>'⇁',
|
|
'DownRightVectorBar;'=>'⥗',
|
|
'DownTee;'=>'⊤',
|
|
'DownTeeArrow;'=>'↧',
|
|
'Downarrow;'=>'⇓',
|
|
'Dscr;'=>'풟',
|
|
'Dstrok;'=>'Đ',
|
|
'ENG;'=>'Ŋ',
|
|
'ETH'=>'Ð',
|
|
'ETH;'=>'Ð',
|
|
'Eacute'=>'É',
|
|
'Eacute;'=>'É',
|
|
'Ecaron;'=>'Ě',
|
|
'Ecirc'=>'Ê',
|
|
'Ecirc;'=>'Ê',
|
|
'Ecy;'=>'Э',
|
|
'Edot;'=>'Ė',
|
|
'Efr;'=>'픈',
|
|
'Egrave'=>'È',
|
|
'Egrave;'=>'È',
|
|
'Element;'=>'∈',
|
|
'Emacr;'=>'Ē',
|
|
'EmptySmallSquare;'=>'◻',
|
|
'EmptyVerySmallSquare;'=>'▫',
|
|
'Eogon;'=>'Ę',
|
|
'Eopf;'=>'피',
|
|
'Epsilon;'=>'Ε',
|
|
'Equal;'=>'⩵',
|
|
'EqualTilde;'=>'≂',
|
|
'Equilibrium;'=>'⇌',
|
|
'Escr;'=>'ℰ',
|
|
'Esim;'=>'⩳',
|
|
'Eta;'=>'Η',
|
|
'Euml'=>'Ë',
|
|
'Euml;'=>'Ë',
|
|
'Exists;'=>'∃',
|
|
'ExponentialE;'=>'ⅇ',
|
|
'Fcy;'=>'Ф',
|
|
'Ffr;'=>'픉',
|
|
'FilledSmallSquare;'=>'◼',
|
|
'FilledVerySmallSquare;'=>'▪',
|
|
'Fopf;'=>'픽',
|
|
'ForAll;'=>'∀',
|
|
'Fouriertrf;'=>'ℱ',
|
|
'Fscr;'=>'ℱ',
|
|
'GJcy;'=>'Ѓ',
|
|
'GT'=>'>',
|
|
'GT;'=>'>',
|
|
'Gamma;'=>'Γ',
|
|
'Gammad;'=>'Ϝ',
|
|
'Gbreve;'=>'Ğ',
|
|
'Gcedil;'=>'Ģ',
|
|
'Gcirc;'=>'Ĝ',
|
|
'Gcy;'=>'Г',
|
|
'Gdot;'=>'Ġ',
|
|
'Gfr;'=>'픊',
|
|
'Gg;'=>'⋙',
|
|
'Gopf;'=>'픾',
|
|
'GreaterEqual;'=>'≥',
|
|
'GreaterEqualLess;'=>'⋛',
|
|
'GreaterFullEqual;'=>'≧',
|
|
'GreaterGreater;'=>'⪢',
|
|
'GreaterLess;'=>'≷',
|
|
'GreaterSlantEqual;'=>'⩾',
|
|
'GreaterTilde;'=>'≳',
|
|
'Gscr;'=>'풢',
|
|
'Gt;'=>'≫',
|
|
'HARDcy;'=>'Ъ',
|
|
'Hacek;'=>'ˇ',
|
|
'Hat;'=>'^',
|
|
'Hcirc;'=>'Ĥ',
|
|
'Hfr;'=>'ℌ',
|
|
'HilbertSpace;'=>'ℋ',
|
|
'Hopf;'=>'ℍ',
|
|
'HorizontalLine;'=>'─',
|
|
'Hscr;'=>'ℋ',
|
|
'Hstrok;'=>'Ħ',
|
|
'HumpDownHump;'=>'≎',
|
|
'HumpEqual;'=>'≏',
|
|
'IEcy;'=>'Е',
|
|
'IJlig;'=>'IJ',
|
|
'IOcy;'=>'Ё',
|
|
'Iacute'=>'Í',
|
|
'Iacute;'=>'Í',
|
|
'Icirc'=>'Î',
|
|
'Icirc;'=>'Î',
|
|
'Icy;'=>'И',
|
|
'Idot;'=>'İ',
|
|
'Ifr;'=>'ℑ',
|
|
'Igrave'=>'Ì',
|
|
'Igrave;'=>'Ì',
|
|
'Im;'=>'ℑ',
|
|
'Imacr;'=>'Ī',
|
|
'ImaginaryI;'=>'ⅈ',
|
|
'Implies;'=>'⇒',
|
|
'Int;'=>'∬',
|
|
'Integral;'=>'∫',
|
|
'Intersection;'=>'⋂',
|
|
'InvisibleComma;'=>'',
|
|
'InvisibleTimes;'=>'',
|
|
'Iogon;'=>'Į',
|
|
'Iopf;'=>'핀',
|
|
'Iota;'=>'Ι',
|
|
'Iscr;'=>'ℐ',
|
|
'Itilde;'=>'Ĩ',
|
|
'Iukcy;'=>'І',
|
|
'Iuml'=>'Ï',
|
|
'Iuml;'=>'Ï',
|
|
'Jcirc;'=>'Ĵ',
|
|
'Jcy;'=>'Й',
|
|
'Jfr;'=>'픍',
|
|
'Jopf;'=>'핁',
|
|
'Jscr;'=>'풥',
|
|
'Jsercy;'=>'Ј',
|
|
'Jukcy;'=>'Є',
|
|
'KHcy;'=>'Х',
|
|
'KJcy;'=>'Ќ',
|
|
'Kappa;'=>'Κ',
|
|
'Kcedil;'=>'Ķ',
|
|
'Kcy;'=>'К',
|
|
'Kfr;'=>'픎',
|
|
'Kopf;'=>'핂',
|
|
'Kscr;'=>'풦',
|
|
'LJcy;'=>'Љ',
|
|
'LT'=>'<',
|
|
'LT;'=>'<',
|
|
'Lacute;'=>'Ĺ',
|
|
'Lambda;'=>'Λ',
|
|
'Lang;'=>'⟪',
|
|
'Laplacetrf;'=>'ℒ',
|
|
'Larr;'=>'↞',
|
|
'Lcaron;'=>'Ľ',
|
|
'Lcedil;'=>'Ļ',
|
|
'Lcy;'=>'Л',
|
|
'LeftAngleBracket;'=>'⟨',
|
|
'LeftArrow;'=>'←',
|
|
'LeftArrowBar;'=>'⇤',
|
|
'LeftArrowRightArrow;'=>'⇆',
|
|
'LeftCeiling;'=>'⌈',
|
|
'LeftDoubleBracket;'=>'⟦',
|
|
'LeftDownTeeVector;'=>'⥡',
|
|
'LeftDownVector;'=>'⇃',
|
|
'LeftDownVectorBar;'=>'⥙',
|
|
'LeftFloor;'=>'⌊',
|
|
'LeftRightArrow;'=>'↔',
|
|
'LeftRightVector;'=>'⥎',
|
|
'LeftTee;'=>'⊣',
|
|
'LeftTeeArrow;'=>'↤',
|
|
'LeftTeeVector;'=>'⥚',
|
|
'LeftTriangle;'=>'⊲',
|
|
'LeftTriangleBar;'=>'⧏',
|
|
'LeftTriangleEqual;'=>'⊴',
|
|
'LeftUpDownVector;'=>'⥑',
|
|
'LeftUpTeeVector;'=>'⥠',
|
|
'LeftUpVector;'=>'↿',
|
|
'LeftUpVectorBar;'=>'⥘',
|
|
'LeftVector;'=>'↼',
|
|
'LeftVectorBar;'=>'⥒',
|
|
'Leftarrow;'=>'⇐',
|
|
'Leftrightarrow;'=>'⇔',
|
|
'LessEqualGreater;'=>'⋚',
|
|
'LessFullEqual;'=>'≦',
|
|
'LessGreater;'=>'≶',
|
|
'LessLess;'=>'⪡',
|
|
'LessSlantEqual;'=>'⩽',
|
|
'LessTilde;'=>'≲',
|
|
'Lfr;'=>'픏',
|
|
'Ll;'=>'⋘',
|
|
'Lleftarrow;'=>'⇚',
|
|
'Lmidot;'=>'Ŀ',
|
|
'LongLeftArrow;'=>'⟵',
|
|
'LongLeftRightArrow;'=>'⟷',
|
|
'LongRightArrow;'=>'⟶',
|
|
'Longleftarrow;'=>'⟸',
|
|
'Longleftrightarrow;'=>'⟺',
|
|
'Longrightarrow;'=>'⟹',
|
|
'Lopf;'=>'핃',
|
|
'LowerLeftArrow;'=>'↙',
|
|
'LowerRightArrow;'=>'↘',
|
|
'Lscr;'=>'ℒ',
|
|
'Lsh;'=>'↰',
|
|
'Lstrok;'=>'Ł',
|
|
'Lt;'=>'≪',
|
|
'Map;'=>'⤅',
|
|
'Mcy;'=>'М',
|
|
'MediumSpace;'=>' ',
|
|
'Mellintrf;'=>'ℳ',
|
|
'Mfr;'=>'픐',
|
|
'MinusPlus;'=>'∓',
|
|
'Mopf;'=>'필',
|
|
'Mscr;'=>'ℳ',
|
|
'Mu;'=>'Μ',
|
|
'NJcy;'=>'Њ',
|
|
'Nacute;'=>'Ń',
|
|
'Ncaron;'=>'Ň',
|
|
'Ncedil;'=>'Ņ',
|
|
'Ncy;'=>'Н',
|
|
'NegativeMediumSpace;'=>'',
|
|
'NegativeThickSpace;'=>'',
|
|
'NegativeThinSpace;'=>'',
|
|
'NegativeVeryThinSpace;'=>'',
|
|
'NestedGreaterGreater;'=>'≫',
|
|
'NestedLessLess;'=>'≪',
|
|
'NewLine;'=>'
|
|
','Nfr;'=>'픑',
|
|
'NoBreak;'=>'',
|
|
'NonBreakingSpace;'=>' ',
|
|
'Nopf;'=>'ℕ',
|
|
'Not;'=>'⫬',
|
|
'NotCongruent;'=>'≢',
|
|
'NotCupCap;'=>'≭',
|
|
'NotDoubleVerticalBar;'=>'∦',
|
|
'NotElement;'=>'∉',
|
|
'NotEqual;'=>'≠',
|
|
'NotEqualTilde;'=>'≂̸',
|
|
'NotExists;'=>'∄',
|
|
'NotGreater;'=>'≯',
|
|
'NotGreaterEqual;'=>'≱',
|
|
'NotGreaterFullEqual;'=>'≧̸',
|
|
'NotGreaterGreater;'=>'≫̸',
|
|
'NotGreaterLess;'=>'≹',
|
|
'NotGreaterSlantEqual;'=>'⩾̸',
|
|
'NotGreaterTilde;'=>'≵',
|
|
'NotHumpDownHump;'=>'≎̸',
|
|
'NotHumpEqual;'=>'≏̸',
|
|
'NotLeftTriangle;'=>'⋪',
|
|
'NotLeftTriangleBar;'=>'⧏̸',
|
|
'NotLeftTriangleEqual;'=>'⋬',
|
|
'NotLess;'=>'≮',
|
|
'NotLessEqual;'=>'≰',
|
|
'NotLessGreater;'=>'≸',
|
|
'NotLessLess;'=>'≪̸',
|
|
'NotLessSlantEqual;'=>'⩽̸',
|
|
'NotLessTilde;'=>'≴',
|
|
'NotNestedGreaterGreater;'=>'⪢̸',
|
|
'NotNestedLessLess;'=>'⪡̸',
|
|
'NotPrecedes;'=>'⊀',
|
|
'NotPrecedesEqual;'=>'⪯̸',
|
|
'NotPrecedesSlantEqual;'=>'⋠',
|
|
'NotReverseElement;'=>'∌',
|
|
'NotRightTriangle;'=>'⋫',
|
|
'NotRightTriangleBar;'=>'⧐̸',
|
|
'NotRightTriangleEqual;'=>'⋭',
|
|
'NotSquareSubset;'=>'⊏̸',
|
|
'NotSquareSubsetEqual;'=>'⋢',
|
|
'NotSquareSuperset;'=>'⊐̸',
|
|
'NotSquareSupersetEqual;'=>'⋣',
|
|
'NotSubset;'=>'⊂⃒',
|
|
'NotSubsetEqual;'=>'⊈',
|
|
'NotSucceeds;'=>'⊁',
|
|
'NotSucceedsEqual;'=>'⪰̸',
|
|
'NotSucceedsSlantEqual;'=>'⋡',
|
|
'NotSucceedsTilde;'=>'≿̸',
|
|
'NotSuperset;'=>'⊃⃒',
|
|
'NotSupersetEqual;'=>'⊉',
|
|
'NotTilde;'=>'≁',
|
|
'NotTildeEqual;'=>'≄',
|
|
'NotTildeFullEqual;'=>'≇',
|
|
'NotTildeTilde;'=>'≉',
|
|
'NotVerticalBar;'=>'∤',
|
|
'Nscr;'=>'풩',
|
|
'Ntilde'=>'Ñ',
|
|
'Ntilde;'=>'Ñ',
|
|
'Nu;'=>'Ν',
|
|
'OElig;'=>'Œ',
|
|
'Oacute'=>'Ó',
|
|
'Oacute;'=>'Ó',
|
|
'Ocirc'=>'Ô',
|
|
'Ocirc;'=>'Ô',
|
|
'Ocy;'=>'О',
|
|
'Odblac;'=>'Ő',
|
|
'Ofr;'=>'픒',
|
|
'Ograve'=>'Ò',
|
|
'Ograve;'=>'Ò',
|
|
'Omacr;'=>'Ō',
|
|
'Omega;'=>'Ω',
|
|
'Omicron;'=>'Ο',
|
|
'Oopf;'=>'핆',
|
|
'OpenCurlyDoubleQuote;'=>'“',
|
|
'OpenCurlyQuote;'=>'‘',
|
|
'Or;'=>'⩔',
|
|
'Oscr;'=>'풪',
|
|
'Oslash'=>'Ø',
|
|
'Oslash;'=>'Ø',
|
|
'Otilde'=>'Õ',
|
|
'Otilde;'=>'Õ',
|
|
'Otimes;'=>'⨷',
|
|
'Ouml'=>'Ö',
|
|
'Ouml;'=>'Ö',
|
|
'OverBar;'=>'‾',
|
|
'OverBrace;'=>'⏞',
|
|
'OverBracket;'=>'⎴',
|
|
'OverParenthesis;'=>'⏜',
|
|
'PartialD;'=>'∂',
|
|
'Pcy;'=>'П',
|
|
'Pfr;'=>'픓',
|
|
'Phi;'=>'Φ',
|
|
'Pi;'=>'Π',
|
|
'PlusMinus;'=>'±',
|
|
'Poincareplane;'=>'ℌ',
|
|
'Popf;'=>'ℙ',
|
|
'Pr;'=>'⪻',
|
|
'Precedes;'=>'≺',
|
|
'PrecedesEqual;'=>'⪯',
|
|
'PrecedesSlantEqual;'=>'≼',
|
|
'PrecedesTilde;'=>'≾',
|
|
'Prime;'=>'″',
|
|
'Product;'=>'∏',
|
|
'Proportion;'=>'∷',
|
|
'Proportional;'=>'∝',
|
|
'Pscr;'=>'풫',
|
|
'Psi;'=>'Ψ',
|
|
'QUOT'=>'"',
|
|
'QUOT;'=>'"',
|
|
'Qfr;'=>'픔',
|
|
'Qopf;'=>'ℚ',
|
|
'Qscr;'=>'풬',
|
|
'RBarr;'=>'⤐',
|
|
'REG'=>'®',
|
|
'REG;'=>'®',
|
|
'Racute;'=>'Ŕ',
|
|
'Rang;'=>'⟫',
|
|
'Rarr;'=>'↠',
|
|
'Rarrtl;'=>'⤖',
|
|
'Rcaron;'=>'Ř',
|
|
'Rcedil;'=>'Ŗ',
|
|
'Rcy;'=>'Р',
|
|
'Re;'=>'ℜ',
|
|
'ReverseElement;'=>'∋',
|
|
'ReverseEquilibrium;'=>'⇋',
|
|
'ReverseUpEquilibrium;'=>'⥯',
|
|
'Rfr;'=>'ℜ',
|
|
'Rho;'=>'Ρ',
|
|
'RightAngleBracket;'=>'⟩',
|
|
'RightArrow;'=>'→',
|
|
'RightArrowBar;'=>'⇥',
|
|
'RightArrowLeftArrow;'=>'⇄',
|
|
'RightCeiling;'=>'⌉',
|
|
'RightDoubleBracket;'=>'⟧',
|
|
'RightDownTeeVector;'=>'⥝',
|
|
'RightDownVector;'=>'⇂',
|
|
'RightDownVectorBar;'=>'⥕',
|
|
'RightFloor;'=>'⌋',
|
|
'RightTee;'=>'⊢',
|
|
'RightTeeArrow;'=>'↦',
|
|
'RightTeeVector;'=>'⥛',
|
|
'RightTriangle;'=>'⊳',
|
|
'RightTriangleBar;'=>'⧐',
|
|
'RightTriangleEqual;'=>'⊵',
|
|
'RightUpDownVector;'=>'⥏',
|
|
'RightUpTeeVector;'=>'⥜',
|
|
'RightUpVector;'=>'↾',
|
|
'RightUpVectorBar;'=>'⥔',
|
|
'RightVector;'=>'⇀',
|
|
'RightVectorBar;'=>'⥓',
|
|
'Rightarrow;'=>'⇒',
|
|
'Ropf;'=>'ℝ',
|
|
'RoundImplies;'=>'⥰',
|
|
'Rrightarrow;'=>'⇛',
|
|
'Rscr;'=>'ℛ',
|
|
'Rsh;'=>'↱',
|
|
'RuleDelayed;'=>'⧴',
|
|
'SHCHcy;'=>'Щ',
|
|
'SHcy;'=>'Ш',
|
|
'SOFTcy;'=>'Ь',
|
|
'Sacute;'=>'Ś',
|
|
'Sc;'=>'⪼',
|
|
'Scaron;'=>'Š',
|
|
'Scedil;'=>'Ş',
|
|
'Scirc;'=>'Ŝ',
|
|
'Scy;'=>'С',
|
|
'Sfr;'=>'픖',
|
|
'ShortDownArrow;'=>'↓',
|
|
'ShortLeftArrow;'=>'←',
|
|
'ShortRightArrow;'=>'→',
|
|
'ShortUpArrow;'=>'↑',
|
|
'Sigma;'=>'Σ',
|
|
'SmallCircle;'=>'∘',
|
|
'Sopf;'=>'핊',
|
|
'Sqrt;'=>'√',
|
|
'Square;'=>'□',
|
|
'SquareIntersection;'=>'⊓',
|
|
'SquareSubset;'=>'⊏',
|
|
'SquareSubsetEqual;'=>'⊑',
|
|
'SquareSuperset;'=>'⊐',
|
|
'SquareSupersetEqual;'=>'⊒',
|
|
'SquareUnion;'=>'⊔',
|
|
'Sscr;'=>'풮',
|
|
'Star;'=>'⋆',
|
|
'Sub;'=>'⋐',
|
|
'Subset;'=>'⋐',
|
|
'SubsetEqual;'=>'⊆',
|
|
'Succeeds;'=>'≻',
|
|
'SucceedsEqual;'=>'⪰',
|
|
'SucceedsSlantEqual;'=>'≽',
|
|
'SucceedsTilde;'=>'≿',
|
|
'SuchThat;'=>'∋',
|
|
'Sum;'=>'∑',
|
|
'Sup;'=>'⋑',
|
|
'Superset;'=>'⊃',
|
|
'SupersetEqual;'=>'⊇',
|
|
'Supset;'=>'⋑',
|
|
'THORN'=>'Þ',
|
|
'THORN;'=>'Þ',
|
|
'TRADE;'=>'™',
|
|
'TSHcy;'=>'Ћ',
|
|
'TScy;'=>'Ц',
|
|
'Tab;'=>' ',
|
|
'Tau;'=>'Τ',
|
|
'Tcaron;'=>'Ť',
|
|
'Tcedil;'=>'Ţ',
|
|
'Tcy;'=>'Т',
|
|
'Tfr;'=>'픗',
|
|
'Therefore;'=>'∴',
|
|
'Theta;'=>'Θ',
|
|
'ThickSpace;'=>' ',
|
|
'ThinSpace;'=>' ',
|
|
'Tilde;'=>'∼',
|
|
'TildeEqual;'=>'≃',
|
|
'TildeFullEqual;'=>'≅',
|
|
'TildeTilde;'=>'≈',
|
|
'Topf;'=>'핋',
|
|
'TripleDot;'=>'⃛',
|
|
'Tscr;'=>'풯',
|
|
'Tstrok;'=>'Ŧ',
|
|
'Uacute'=>'Ú',
|
|
'Uacute;'=>'Ú',
|
|
'Uarr;'=>'↟',
|
|
'Uarrocir;'=>'⥉',
|
|
'Ubrcy;'=>'Ў',
|
|
'Ubreve;'=>'Ŭ',
|
|
'Ucirc'=>'Û',
|
|
'Ucirc;'=>'Û',
|
|
'Ucy;'=>'У',
|
|
'Udblac;'=>'Ű',
|
|
'Ufr;'=>'픘',
|
|
'Ugrave'=>'Ù',
|
|
'Ugrave;'=>'Ù',
|
|
'Umacr;'=>'Ū',
|
|
'UnderBar;'=>'_',
|
|
'UnderBrace;'=>'⏟',
|
|
'UnderBracket;'=>'⎵',
|
|
'UnderParenthesis;'=>'⏝',
|
|
'Union;'=>'⋃',
|
|
'UnionPlus;'=>'⊎',
|
|
'Uogon;'=>'Ų',
|
|
'Uopf;'=>'핌',
|
|
'UpArrow;'=>'↑',
|
|
'UpArrowBar;'=>'⤒',
|
|
'UpArrowDownArrow;'=>'⇅',
|
|
'UpDownArrow;'=>'↕',
|
|
'UpEquilibrium;'=>'⥮',
|
|
'UpTee;'=>'⊥',
|
|
'UpTeeArrow;'=>'↥',
|
|
'Uparrow;'=>'⇑',
|
|
'Updownarrow;'=>'⇕',
|
|
'UpperLeftArrow;'=>'↖',
|
|
'UpperRightArrow;'=>'↗',
|
|
'Upsi;'=>'ϒ',
|
|
'Upsilon;'=>'Υ',
|
|
'Uring;'=>'Ů',
|
|
'Uscr;'=>'풰',
|
|
'Utilde;'=>'Ũ',
|
|
'Uuml'=>'Ü',
|
|
'Uuml;'=>'Ü',
|
|
'VDash;'=>'⊫',
|
|
'Vbar;'=>'⫫',
|
|
'Vcy;'=>'В',
|
|
'Vdash;'=>'⊩',
|
|
'Vdashl;'=>'⫦',
|
|
'Vee;'=>'⋁',
|
|
'Verbar;'=>'‖',
|
|
'Vert;'=>'‖',
|
|
'VerticalBar;'=>'∣',
|
|
'VerticalLine;'=>'|',
|
|
'VerticalSeparator;'=>'❘',
|
|
'VerticalTilde;'=>'≀',
|
|
'VeryThinSpace;'=>' ',
|
|
'Vfr;'=>'픙',
|
|
'Vopf;'=>'핍',
|
|
'Vscr;'=>'풱',
|
|
'Vvdash;'=>'⊪',
|
|
'Wcirc;'=>'Ŵ',
|
|
'Wedge;'=>'⋀',
|
|
'Wfr;'=>'픚',
|
|
'Wopf;'=>'핎',
|
|
'Wscr;'=>'풲',
|
|
'Xfr;'=>'픛',
|
|
'Xi;'=>'Ξ',
|
|
'Xopf;'=>'핏',
|
|
'Xscr;'=>'풳',
|
|
'YAcy;'=>'Я',
|
|
'YIcy;'=>'Ї',
|
|
'YUcy;'=>'Ю',
|
|
'Yacute'=>'Ý',
|
|
'Yacute;'=>'Ý',
|
|
'Ycirc;'=>'Ŷ',
|
|
'Ycy;'=>'Ы',
|
|
'Yfr;'=>'픜',
|
|
'Yopf;'=>'핐',
|
|
'Yscr;'=>'풴',
|
|
'Yuml;'=>'Ÿ',
|
|
'ZHcy;'=>'Ж',
|
|
'Zacute;'=>'Ź',
|
|
'Zcaron;'=>'Ž',
|
|
'Zcy;'=>'З',
|
|
'Zdot;'=>'Ż',
|
|
'ZeroWidthSpace;'=>'',
|
|
'Zeta;'=>'Ζ',
|
|
'Zfr;'=>'ℨ',
|
|
'Zopf;'=>'ℤ',
|
|
'Zscr;'=>'풵',
|
|
'aacute'=>'á',
|
|
'aacute;'=>'á',
|
|
'abreve;'=>'ă',
|
|
'ac;'=>'∾',
|
|
'acE;'=>'∾̳',
|
|
'acd;'=>'∿',
|
|
'acirc'=>'â',
|
|
'acirc;'=>'â',
|
|
'acute'=>'´',
|
|
'acute;'=>'´',
|
|
'acy;'=>'а',
|
|
'aelig'=>'æ',
|
|
'aelig;'=>'æ',
|
|
'af;'=>'',
|
|
'afr;'=>'픞',
|
|
'agrave'=>'à',
|
|
'agrave;'=>'à',
|
|
'alefsym;'=>'ℵ',
|
|
'aleph;'=>'ℵ',
|
|
'alpha;'=>'α',
|
|
'amacr;'=>'ā',
|
|
'amalg;'=>'⨿',
|
|
'amp'=>'&',
|
|
'amp;'=>'&',
|
|
'and;'=>'∧',
|
|
'andand;'=>'⩕',
|
|
'andd;'=>'⩜',
|
|
'andslope;'=>'⩘',
|
|
'andv;'=>'⩚',
|
|
'ang;'=>'∠',
|
|
'ange;'=>'⦤',
|
|
'angle;'=>'∠',
|
|
'angmsd;'=>'∡',
|
|
'angmsdaa;'=>'⦨',
|
|
'angmsdab;'=>'⦩',
|
|
'angmsdac;'=>'⦪',
|
|
'angmsdad;'=>'⦫',
|
|
'angmsdae;'=>'⦬',
|
|
'angmsdaf;'=>'⦭',
|
|
'angmsdag;'=>'⦮',
|
|
'angmsdah;'=>'⦯',
|
|
'angrt;'=>'∟',
|
|
'angrtvb;'=>'⊾',
|
|
'angrtvbd;'=>'⦝',
|
|
'angsph;'=>'∢',
|
|
'angst;'=>'Å',
|
|
'angzarr;'=>'⍼',
|
|
'aogon;'=>'ą',
|
|
'aopf;'=>'핒',
|
|
'ap;'=>'≈',
|
|
'apE;'=>'⩰',
|
|
'apacir;'=>'⩯',
|
|
'ape;'=>'≊',
|
|
'apid;'=>'≋',
|
|
'apos;'=>'\'',
|
|
'approx;'=>'≈',
|
|
'approxeq;'=>'≊',
|
|
'aring'=>'å',
|
|
'aring;'=>'å',
|
|
'ascr;'=>'풶',
|
|
'ast;'=>'*',
|
|
'asymp;'=>'≈',
|
|
'asympeq;'=>'≍',
|
|
'atilde'=>'ã',
|
|
'atilde;'=>'ã',
|
|
'auml'=>'ä',
|
|
'auml;'=>'ä',
|
|
'awconint;'=>'∳',
|
|
'awint;'=>'⨑',
|
|
'bNot;'=>'⫭',
|
|
'backcong;'=>'≌',
|
|
'backepsilon;'=>'϶',
|
|
'backprime;'=>'‵',
|
|
'backsim;'=>'∽',
|
|
'backsimeq;'=>'⋍',
|
|
'barvee;'=>'⊽',
|
|
'barwed;'=>'⌅',
|
|
'barwedge;'=>'⌅',
|
|
'bbrk;'=>'⎵',
|
|
'bbrktbrk;'=>'⎶',
|
|
'bcong;'=>'≌',
|
|
'bcy;'=>'б',
|
|
'bdquo;'=>'„',
|
|
'becaus;'=>'∵',
|
|
'because;'=>'∵',
|
|
'bemptyv;'=>'⦰',
|
|
'bepsi;'=>'϶',
|
|
'bernou;'=>'ℬ',
|
|
'beta;'=>'β',
|
|
'beth;'=>'ℶ',
|
|
'between;'=>'≬',
|
|
'bfr;'=>'픟',
|
|
'bigcap;'=>'⋂',
|
|
'bigcirc;'=>'◯',
|
|
'bigcup;'=>'⋃',
|
|
'bigodot;'=>'⨀',
|
|
'bigoplus;'=>'⨁',
|
|
'bigotimes;'=>'⨂',
|
|
'bigsqcup;'=>'⨆',
|
|
'bigstar;'=>'★',
|
|
'bigtriangledown;'=>'▽',
|
|
'bigtriangleup;'=>'△',
|
|
'biguplus;'=>'⨄',
|
|
'bigvee;'=>'⋁',
|
|
'bigwedge;'=>'⋀',
|
|
'bkarow;'=>'⤍',
|
|
'blacklozenge;'=>'⧫',
|
|
'blacksquare;'=>'▪',
|
|
'blacktriangle;'=>'▴',
|
|
'blacktriangledown;'=>'▾',
|
|
'blacktriangleleft;'=>'◂',
|
|
'blacktriangleright;'=>'▸',
|
|
'blank;'=>'␣',
|
|
'blk12;'=>'▒',
|
|
'blk14;'=>'░',
|
|
'blk34;'=>'▓',
|
|
'block;'=>'█',
|
|
'bne;'=>'=⃥',
|
|
'bnequiv;'=>'≡⃥',
|
|
'bnot;'=>'⌐',
|
|
'bopf;'=>'핓',
|
|
'bot;'=>'⊥',
|
|
'bottom;'=>'⊥',
|
|
'bowtie;'=>'⋈',
|
|
'boxDL;'=>'╗',
|
|
'boxDR;'=>'╔',
|
|
'boxDl;'=>'╖',
|
|
'boxDr;'=>'╓',
|
|
'boxH;'=>'═',
|
|
'boxHD;'=>'╦',
|
|
'boxHU;'=>'╩',
|
|
'boxHd;'=>'╤',
|
|
'boxHu;'=>'╧',
|
|
'boxUL;'=>'╝',
|
|
'boxUR;'=>'╚',
|
|
'boxUl;'=>'╜',
|
|
'boxUr;'=>'╙',
|
|
'boxV;'=>'║',
|
|
'boxVH;'=>'╬',
|
|
'boxVL;'=>'╣',
|
|
'boxVR;'=>'╠',
|
|
'boxVh;'=>'╫',
|
|
'boxVl;'=>'╢',
|
|
'boxVr;'=>'╟',
|
|
'boxbox;'=>'⧉',
|
|
'boxdL;'=>'╕',
|
|
'boxdR;'=>'╒',
|
|
'boxdl;'=>'┐',
|
|
'boxdr;'=>'┌',
|
|
'boxh;'=>'─',
|
|
'boxhD;'=>'╥',
|
|
'boxhU;'=>'╨',
|
|
'boxhd;'=>'┬',
|
|
'boxhu;'=>'┴',
|
|
'boxminus;'=>'⊟',
|
|
'boxplus;'=>'⊞',
|
|
'boxtimes;'=>'⊠',
|
|
'boxuL;'=>'╛',
|
|
'boxuR;'=>'╘',
|
|
'boxul;'=>'┘',
|
|
'boxur;'=>'└',
|
|
'boxv;'=>'│',
|
|
'boxvH;'=>'╪',
|
|
'boxvL;'=>'╡',
|
|
'boxvR;'=>'╞',
|
|
'boxvh;'=>'┼',
|
|
'boxvl;'=>'┤',
|
|
'boxvr;'=>'├',
|
|
'bprime;'=>'‵',
|
|
'breve;'=>'˘',
|
|
'brvbar'=>'¦',
|
|
'brvbar;'=>'¦',
|
|
'bscr;'=>'풷',
|
|
'bsemi;'=>'⁏',
|
|
'bsim;'=>'∽',
|
|
'bsime;'=>'⋍',
|
|
'bsol;'=>'\\',
|
|
'bsolb;'=>'⧅',
|
|
'bsolhsub;'=>'⟈',
|
|
'bull;'=>'•',
|
|
'bullet;'=>'•',
|
|
'bump;'=>'≎',
|
|
'bumpE;'=>'⪮',
|
|
'bumpe;'=>'≏',
|
|
'bumpeq;'=>'≏',
|
|
'cacute;'=>'ć',
|
|
'cap;'=>'∩',
|
|
'capand;'=>'⩄',
|
|
'capbrcup;'=>'⩉',
|
|
'capcap;'=>'⩋',
|
|
'capcup;'=>'⩇',
|
|
'capdot;'=>'⩀',
|
|
'caps;'=>'∩︀',
|
|
'caret;'=>'⁁',
|
|
'caron;'=>'ˇ',
|
|
'ccaps;'=>'⩍',
|
|
'ccaron;'=>'č',
|
|
'ccedil'=>'ç',
|
|
'ccedil;'=>'ç',
|
|
'ccirc;'=>'ĉ',
|
|
'ccups;'=>'⩌',
|
|
'ccupssm;'=>'⩐',
|
|
'cdot;'=>'ċ',
|
|
'cedil'=>'¸',
|
|
'cedil;'=>'¸',
|
|
'cemptyv;'=>'⦲',
|
|
'cent'=>'¢',
|
|
'cent;'=>'¢',
|
|
'centerdot;'=>'·',
|
|
'cfr;'=>'픠',
|
|
'chcy;'=>'ч',
|
|
'check;'=>'✓',
|
|
'checkmark;'=>'✓',
|
|
'chi;'=>'χ',
|
|
'cir;'=>'○',
|
|
'cirE;'=>'⧃',
|
|
'circ;'=>'ˆ',
|
|
'circeq;'=>'≗',
|
|
'circlearrowleft;'=>'↺',
|
|
'circlearrowright;'=>'↻',
|
|
'circledR;'=>'®',
|
|
'circledS;'=>'Ⓢ',
|
|
'circledast;'=>'⊛',
|
|
'circledcirc;'=>'⊚',
|
|
'circleddash;'=>'⊝',
|
|
'cire;'=>'≗',
|
|
'cirfnint;'=>'⨐',
|
|
'cirmid;'=>'⫯',
|
|
'cirscir;'=>'⧂',
|
|
'clubs;'=>'♣',
|
|
'clubsuit;'=>'♣',
|
|
'colon;'=>':',
|
|
'colone;'=>'≔',
|
|
'coloneq;'=>'≔',
|
|
'comma;'=>',',
|
|
'commat;'=>'@',
|
|
'comp;'=>'∁',
|
|
'compfn;'=>'∘',
|
|
'complement;'=>'∁',
|
|
'complexes;'=>'ℂ',
|
|
'cong;'=>'≅',
|
|
'congdot;'=>'⩭',
|
|
'conint;'=>'∮',
|
|
'copf;'=>'핔',
|
|
'coprod;'=>'∐',
|
|
'copy'=>'©',
|
|
'copy;'=>'©',
|
|
'copysr;'=>'℗',
|
|
'crarr;'=>'↵',
|
|
'cross;'=>'✗',
|
|
'cscr;'=>'풸',
|
|
'csub;'=>'⫏',
|
|
'csube;'=>'⫑',
|
|
'csup;'=>'⫐',
|
|
'csupe;'=>'⫒',
|
|
'ctdot;'=>'⋯',
|
|
'cudarrl;'=>'⤸',
|
|
'cudarrr;'=>'⤵',
|
|
'cuepr;'=>'⋞',
|
|
'cuesc;'=>'⋟',
|
|
'cularr;'=>'↶',
|
|
'cularrp;'=>'⤽',
|
|
'cup;'=>'∪',
|
|
'cupbrcap;'=>'⩈',
|
|
'cupcap;'=>'⩆',
|
|
'cupcup;'=>'⩊',
|
|
'cupdot;'=>'⊍',
|
|
'cupor;'=>'⩅',
|
|
'cups;'=>'∪︀',
|
|
'curarr;'=>'↷',
|
|
'curarrm;'=>'⤼',
|
|
'curlyeqprec;'=>'⋞',
|
|
'curlyeqsucc;'=>'⋟',
|
|
'curlyvee;'=>'⋎',
|
|
'curlywedge;'=>'⋏',
|
|
'curren'=>'¤',
|
|
'curren;'=>'¤',
|
|
'curvearrowleft;'=>'↶',
|
|
'curvearrowright;'=>'↷',
|
|
'cuvee;'=>'⋎',
|
|
'cuwed;'=>'⋏',
|
|
'cwconint;'=>'∲',
|
|
'cwint;'=>'∱',
|
|
'cylcty;'=>'⌭',
|
|
'dArr;'=>'⇓',
|
|
'dHar;'=>'⥥',
|
|
'dagger;'=>'†',
|
|
'daleth;'=>'ℸ',
|
|
'darr;'=>'↓',
|
|
'dash;'=>'‐',
|
|
'dashv;'=>'⊣',
|
|
'dbkarow;'=>'⤏',
|
|
'dblac;'=>'˝',
|
|
'dcaron;'=>'ď',
|
|
'dcy;'=>'д',
|
|
'dd;'=>'ⅆ',
|
|
'ddagger;'=>'‡',
|
|
'ddarr;'=>'⇊',
|
|
'ddotseq;'=>'⩷',
|
|
'deg'=>'°',
|
|
'deg;'=>'°',
|
|
'delta;'=>'δ',
|
|
'demptyv;'=>'⦱',
|
|
'dfisht;'=>'⥿',
|
|
'dfr;'=>'픡',
|
|
'dharl;'=>'⇃',
|
|
'dharr;'=>'⇂',
|
|
'diam;'=>'⋄',
|
|
'diamond;'=>'⋄',
|
|
'diamondsuit;'=>'♦',
|
|
'diams;'=>'♦',
|
|
'die;'=>'¨',
|
|
'digamma;'=>'ϝ',
|
|
'disin;'=>'⋲',
|
|
'div;'=>'÷',
|
|
'divide'=>'÷',
|
|
'divide;'=>'÷',
|
|
'divideontimes;'=>'⋇',
|
|
'divonx;'=>'⋇',
|
|
'djcy;'=>'ђ',
|
|
'dlcorn;'=>'⌞',
|
|
'dlcrop;'=>'⌍',
|
|
'dollar;'=>'$',
|
|
'dopf;'=>'핕',
|
|
'dot;'=>'˙',
|
|
'doteq;'=>'≐',
|
|
'doteqdot;'=>'≑',
|
|
'dotminus;'=>'∸',
|
|
'dotplus;'=>'∔',
|
|
'dotsquare;'=>'⊡',
|
|
'doublebarwedge;'=>'⌆',
|
|
'downarrow;'=>'↓',
|
|
'downdownarrows;'=>'⇊',
|
|
'downharpoonleft;'=>'⇃',
|
|
'downharpoonright;'=>'⇂',
|
|
'drbkarow;'=>'⤐',
|
|
'drcorn;'=>'⌟',
|
|
'drcrop;'=>'⌌',
|
|
'dscr;'=>'풹',
|
|
'dscy;'=>'ѕ',
|
|
'dsol;'=>'⧶',
|
|
'dstrok;'=>'đ',
|
|
'dtdot;'=>'⋱',
|
|
'dtri;'=>'▿',
|
|
'dtrif;'=>'▾',
|
|
'duarr;'=>'⇵',
|
|
'duhar;'=>'⥯',
|
|
'dwangle;'=>'⦦',
|
|
'dzcy;'=>'џ',
|
|
'dzigrarr;'=>'⟿',
|
|
'eDDot;'=>'⩷',
|
|
'eDot;'=>'≑',
|
|
'eacute'=>'é',
|
|
'eacute;'=>'é',
|
|
'easter;'=>'⩮',
|
|
'ecaron;'=>'ě',
|
|
'ecir;'=>'≖',
|
|
'ecirc'=>'ê',
|
|
'ecirc;'=>'ê',
|
|
'ecolon;'=>'≕',
|
|
'ecy;'=>'э',
|
|
'edot;'=>'ė',
|
|
'ee;'=>'ⅇ',
|
|
'efDot;'=>'≒',
|
|
'efr;'=>'픢',
|
|
'eg;'=>'⪚',
|
|
'egrave'=>'è',
|
|
'egrave;'=>'è',
|
|
'egs;'=>'⪖',
|
|
'egsdot;'=>'⪘',
|
|
'el;'=>'⪙',
|
|
'elinters;'=>'⏧',
|
|
'ell;'=>'ℓ',
|
|
'els;'=>'⪕',
|
|
'elsdot;'=>'⪗',
|
|
'emacr;'=>'ē',
|
|
'empty;'=>'∅',
|
|
'emptyset;'=>'∅',
|
|
'emptyv;'=>'∅',
|
|
'emsp13;'=>' ',
|
|
'emsp14;'=>' ',
|
|
'emsp;'=>' ',
|
|
'eng;'=>'ŋ',
|
|
'ensp;'=>' ',
|
|
'eogon;'=>'ę',
|
|
'eopf;'=>'핖',
|
|
'epar;'=>'⋕',
|
|
'eparsl;'=>'⧣',
|
|
'eplus;'=>'⩱',
|
|
'epsi;'=>'ε',
|
|
'epsilon;'=>'ε',
|
|
'epsiv;'=>'ϵ',
|
|
'eqcirc;'=>'≖',
|
|
'eqcolon;'=>'≕',
|
|
'eqsim;'=>'≂',
|
|
'eqslantgtr;'=>'⪖',
|
|
'eqslantless;'=>'⪕',
|
|
'equals;'=>'=',
|
|
'equest;'=>'≟',
|
|
'equiv;'=>'≡',
|
|
'equivDD;'=>'⩸',
|
|
'eqvparsl;'=>'⧥',
|
|
'erDot;'=>'≓',
|
|
'erarr;'=>'⥱',
|
|
'escr;'=>'ℯ',
|
|
'esdot;'=>'≐',
|
|
'esim;'=>'≂',
|
|
'eta;'=>'η',
|
|
'eth'=>'ð',
|
|
'eth;'=>'ð',
|
|
'euml'=>'ë',
|
|
'euml;'=>'ë',
|
|
'euro;'=>'€',
|
|
'excl;'=>'!',
|
|
'exist;'=>'∃',
|
|
'expectation;'=>'ℰ',
|
|
'exponentiale;'=>'ⅇ',
|
|
'fallingdotseq;'=>'≒',
|
|
'fcy;'=>'ф',
|
|
'female;'=>'♀',
|
|
'ffilig;'=>'ffi',
|
|
'fflig;'=>'ff',
|
|
'ffllig;'=>'ffl',
|
|
'ffr;'=>'픣',
|
|
'filig;'=>'fi',
|
|
'fjlig;'=>'fj',
|
|
'flat;'=>'♭',
|
|
'fllig;'=>'fl',
|
|
'fltns;'=>'▱',
|
|
'fnof;'=>'ƒ',
|
|
'fopf;'=>'핗',
|
|
'forall;'=>'∀',
|
|
'fork;'=>'⋔',
|
|
'forkv;'=>'⫙',
|
|
'fpartint;'=>'⨍',
|
|
'frac12'=>'½',
|
|
'frac12;'=>'½',
|
|
'frac13;'=>'⅓',
|
|
'frac14'=>'¼',
|
|
'frac14;'=>'¼',
|
|
'frac15;'=>'⅕',
|
|
'frac16;'=>'⅙',
|
|
'frac18;'=>'⅛',
|
|
'frac23;'=>'⅔',
|
|
'frac25;'=>'⅖',
|
|
'frac34'=>'¾',
|
|
'frac34;'=>'¾',
|
|
'frac35;'=>'⅗',
|
|
'frac38;'=>'⅜',
|
|
'frac45;'=>'⅘',
|
|
'frac56;'=>'⅚',
|
|
'frac58;'=>'⅝',
|
|
'frac78;'=>'⅞',
|
|
'frasl;'=>'⁄',
|
|
'frown;'=>'⌢',
|
|
'fscr;'=>'풻',
|
|
'gE;'=>'≧',
|
|
'gEl;'=>'⪌',
|
|
'gacute;'=>'ǵ',
|
|
'gamma;'=>'γ',
|
|
'gammad;'=>'ϝ',
|
|
'gap;'=>'⪆',
|
|
'gbreve;'=>'ğ',
|
|
'gcirc;'=>'ĝ',
|
|
'gcy;'=>'г',
|
|
'gdot;'=>'ġ',
|
|
'ge;'=>'≥',
|
|
'gel;'=>'⋛',
|
|
'geq;'=>'≥',
|
|
'geqq;'=>'≧',
|
|
'geqslant;'=>'⩾',
|
|
'ges;'=>'⩾',
|
|
'gescc;'=>'⪩',
|
|
'gesdot;'=>'⪀',
|
|
'gesdoto;'=>'⪂',
|
|
'gesdotol;'=>'⪄',
|
|
'gesl;'=>'⋛︀',
|
|
'gesles;'=>'⪔',
|
|
'gfr;'=>'픤',
|
|
'gg;'=>'≫',
|
|
'ggg;'=>'⋙',
|
|
'gimel;'=>'ℷ',
|
|
'gjcy;'=>'ѓ',
|
|
'gl;'=>'≷',
|
|
'glE;'=>'⪒',
|
|
'gla;'=>'⪥',
|
|
'glj;'=>'⪤',
|
|
'gnE;'=>'≩',
|
|
'gnap;'=>'⪊',
|
|
'gnapprox;'=>'⪊',
|
|
'gne;'=>'⪈',
|
|
'gneq;'=>'⪈',
|
|
'gneqq;'=>'≩',
|
|
'gnsim;'=>'⋧',
|
|
'gopf;'=>'하',
|
|
'grave;'=>'`',
|
|
'gscr;'=>'ℊ',
|
|
'gsim;'=>'≳',
|
|
'gsime;'=>'⪎',
|
|
'gsiml;'=>'⪐',
|
|
'gt'=>'>',
|
|
'gt;'=>'>',
|
|
'gtcc;'=>'⪧',
|
|
'gtcir;'=>'⩺',
|
|
'gtdot;'=>'⋗',
|
|
'gtlPar;'=>'⦕',
|
|
'gtquest;'=>'⩼',
|
|
'gtrapprox;'=>'⪆',
|
|
'gtrarr;'=>'⥸',
|
|
'gtrdot;'=>'⋗',
|
|
'gtreqless;'=>'⋛',
|
|
'gtreqqless;'=>'⪌',
|
|
'gtrless;'=>'≷',
|
|
'gtrsim;'=>'≳',
|
|
'gvertneqq;'=>'≩︀',
|
|
'gvnE;'=>'≩︀',
|
|
'hArr;'=>'⇔',
|
|
'hairsp;'=>' ',
|
|
'half;'=>'½',
|
|
'hamilt;'=>'ℋ',
|
|
'hardcy;'=>'ъ',
|
|
'harr;'=>'↔',
|
|
'harrcir;'=>'⥈',
|
|
'harrw;'=>'↭',
|
|
'hbar;'=>'ℏ',
|
|
'hcirc;'=>'ĥ',
|
|
'hearts;'=>'♥',
|
|
'heartsuit;'=>'♥',
|
|
'hellip;'=>'…',
|
|
'hercon;'=>'⊹',
|
|
'hfr;'=>'픥',
|
|
'hksearow;'=>'⤥',
|
|
'hkswarow;'=>'⤦',
|
|
'hoarr;'=>'⇿',
|
|
'homtht;'=>'∻',
|
|
'hookleftarrow;'=>'↩',
|
|
'hookrightarrow;'=>'↪',
|
|
'hopf;'=>'학',
|
|
'horbar;'=>'―',
|
|
'hscr;'=>'풽',
|
|
'hslash;'=>'ℏ',
|
|
'hstrok;'=>'ħ',
|
|
'hybull;'=>'⁃',
|
|
'hyphen;'=>'‐',
|
|
'iacute'=>'í',
|
|
'iacute;'=>'í',
|
|
'ic;'=>'',
|
|
'icirc'=>'î',
|
|
'icirc;'=>'î',
|
|
'icy;'=>'и',
|
|
'iecy;'=>'е',
|
|
'iexcl'=>'¡',
|
|
'iexcl;'=>'¡',
|
|
'iff;'=>'⇔',
|
|
'ifr;'=>'픦',
|
|
'igrave'=>'ì',
|
|
'igrave;'=>'ì',
|
|
'ii;'=>'ⅈ',
|
|
'iiiint;'=>'⨌',
|
|
'iiint;'=>'∭',
|
|
'iinfin;'=>'⧜',
|
|
'iiota;'=>'℩',
|
|
'ijlig;'=>'ij',
|
|
'imacr;'=>'ī',
|
|
'image;'=>'ℑ',
|
|
'imagline;'=>'ℐ',
|
|
'imagpart;'=>'ℑ',
|
|
'imath;'=>'ı',
|
|
'imof;'=>'⊷',
|
|
'imped;'=>'Ƶ',
|
|
'in;'=>'∈',
|
|
'incare;'=>'℅',
|
|
'infin;'=>'∞',
|
|
'infintie;'=>'⧝',
|
|
'inodot;'=>'ı',
|
|
'int;'=>'∫',
|
|
'intcal;'=>'⊺',
|
|
'integers;'=>'ℤ',
|
|
'intercal;'=>'⊺',
|
|
'intlarhk;'=>'⨗',
|
|
'intprod;'=>'⨼',
|
|
'iocy;'=>'ё',
|
|
'iogon;'=>'į',
|
|
'iopf;'=>'핚',
|
|
'iota;'=>'ι',
|
|
'iprod;'=>'⨼',
|
|
'iquest'=>'¿',
|
|
'iquest;'=>'¿',
|
|
'iscr;'=>'풾',
|
|
'isin;'=>'∈',
|
|
'isinE;'=>'⋹',
|
|
'isindot;'=>'⋵',
|
|
'isins;'=>'⋴',
|
|
'isinsv;'=>'⋳',
|
|
'isinv;'=>'∈',
|
|
'it;'=>'',
|
|
'itilde;'=>'ĩ',
|
|
'iukcy;'=>'і',
|
|
'iuml'=>'ï',
|
|
'iuml;'=>'ï',
|
|
'jcirc;'=>'ĵ',
|
|
'jcy;'=>'й',
|
|
'jfr;'=>'픧',
|
|
'jmath;'=>'ȷ',
|
|
'jopf;'=>'핛',
|
|
'jscr;'=>'풿',
|
|
'jsercy;'=>'ј',
|
|
'jukcy;'=>'є',
|
|
'kappa;'=>'κ',
|
|
'kappav;'=>'ϰ',
|
|
'kcedil;'=>'ķ',
|
|
'kcy;'=>'к',
|
|
'kfr;'=>'픨',
|
|
'kgreen;'=>'ĸ',
|
|
'khcy;'=>'х',
|
|
'kjcy;'=>'ќ',
|
|
'kopf;'=>'한',
|
|
'kscr;'=>'퓀',
|
|
'lAarr;'=>'⇚',
|
|
'lArr;'=>'⇐',
|
|
'lAtail;'=>'⤛',
|
|
'lBarr;'=>'⤎',
|
|
'lE;'=>'≦',
|
|
'lEg;'=>'⪋',
|
|
'lHar;'=>'⥢',
|
|
'lacute;'=>'ĺ',
|
|
'laemptyv;'=>'⦴',
|
|
'lagran;'=>'ℒ',
|
|
'lambda;'=>'λ',
|
|
'lang;'=>'⟨',
|
|
'langd;'=>'⦑',
|
|
'langle;'=>'⟨',
|
|
'lap;'=>'⪅',
|
|
'laquo'=>'«',
|
|
'laquo;'=>'«',
|
|
'larr;'=>'←',
|
|
'larrb;'=>'⇤',
|
|
'larrbfs;'=>'⤟',
|
|
'larrfs;'=>'⤝',
|
|
'larrhk;'=>'↩',
|
|
'larrlp;'=>'↫',
|
|
'larrpl;'=>'⤹',
|
|
'larrsim;'=>'⥳',
|
|
'larrtl;'=>'↢',
|
|
'lat;'=>'⪫',
|
|
'latail;'=>'⤙',
|
|
'late;'=>'⪭',
|
|
'lates;'=>'⪭︀',
|
|
'lbarr;'=>'⤌',
|
|
'lbbrk;'=>'❲',
|
|
'lbrace;'=>'{',
|
|
'lbrack;'=>'[',
|
|
'lbrke;'=>'⦋',
|
|
'lbrksld;'=>'⦏',
|
|
'lbrkslu;'=>'⦍',
|
|
'lcaron;'=>'ľ',
|
|
'lcedil;'=>'ļ',
|
|
'lceil;'=>'⌈',
|
|
'lcub;'=>'{',
|
|
'lcy;'=>'л',
|
|
'ldca;'=>'⤶',
|
|
'ldquo;'=>'“',
|
|
'ldquor;'=>'„',
|
|
'ldrdhar;'=>'⥧',
|
|
'ldrushar;'=>'⥋',
|
|
'ldsh;'=>'↲',
|
|
'le;'=>'≤',
|
|
'leftarrow;'=>'←',
|
|
'leftarrowtail;'=>'↢',
|
|
'leftharpoondown;'=>'↽',
|
|
'leftharpoonup;'=>'↼',
|
|
'leftleftarrows;'=>'⇇',
|
|
'leftrightarrow;'=>'↔',
|
|
'leftrightarrows;'=>'⇆',
|
|
'leftrightharpoons;'=>'⇋',
|
|
'leftrightsquigarrow;'=>'↭',
|
|
'leftthreetimes;'=>'⋋',
|
|
'leg;'=>'⋚',
|
|
'leq;'=>'≤',
|
|
'leqq;'=>'≦',
|
|
'leqslant;'=>'⩽',
|
|
'les;'=>'⩽',
|
|
'lescc;'=>'⪨',
|
|
'lesdot;'=>'⩿',
|
|
'lesdoto;'=>'⪁',
|
|
'lesdotor;'=>'⪃',
|
|
'lesg;'=>'⋚︀',
|
|
'lesges;'=>'⪓',
|
|
'lessapprox;'=>'⪅',
|
|
'lessdot;'=>'⋖',
|
|
'lesseqgtr;'=>'⋚',
|
|
'lesseqqgtr;'=>'⪋',
|
|
'lessgtr;'=>'≶',
|
|
'lesssim;'=>'≲',
|
|
'lfisht;'=>'⥼',
|
|
'lfloor;'=>'⌊',
|
|
'lfr;'=>'픩',
|
|
'lg;'=>'≶',
|
|
'lgE;'=>'⪑',
|
|
'lhard;'=>'↽',
|
|
'lharu;'=>'↼',
|
|
'lharul;'=>'⥪',
|
|
'lhblk;'=>'▄',
|
|
'ljcy;'=>'љ',
|
|
'll;'=>'≪',
|
|
'llarr;'=>'⇇',
|
|
'llcorner;'=>'⌞',
|
|
'llhard;'=>'⥫',
|
|
'lltri;'=>'◺',
|
|
'lmidot;'=>'ŀ',
|
|
'lmoust;'=>'⎰',
|
|
'lmoustache;'=>'⎰',
|
|
'lnE;'=>'≨',
|
|
'lnap;'=>'⪉',
|
|
'lnapprox;'=>'⪉',
|
|
'lne;'=>'⪇',
|
|
'lneq;'=>'⪇',
|
|
'lneqq;'=>'≨',
|
|
'lnsim;'=>'⋦',
|
|
'loang;'=>'⟬',
|
|
'loarr;'=>'⇽',
|
|
'lobrk;'=>'⟦',
|
|
'longleftarrow;'=>'⟵',
|
|
'longleftrightarrow;'=>'⟷',
|
|
'longmapsto;'=>'⟼',
|
|
'longrightarrow;'=>'⟶',
|
|
'looparrowleft;'=>'↫',
|
|
'looparrowright;'=>'↬',
|
|
'lopar;'=>'⦅',
|
|
'lopf;'=>'핝',
|
|
'loplus;'=>'⨭',
|
|
'lotimes;'=>'⨴',
|
|
'lowast;'=>'∗',
|
|
'lowbar;'=>'_',
|
|
'loz;'=>'◊',
|
|
'lozenge;'=>'◊',
|
|
'lozf;'=>'⧫',
|
|
'lpar;'=>'(',
|
|
'lparlt;'=>'⦓',
|
|
'lrarr;'=>'⇆',
|
|
'lrcorner;'=>'⌟',
|
|
'lrhar;'=>'⇋',
|
|
'lrhard;'=>'⥭',
|
|
'lrm;'=>'',
|
|
'lrtri;'=>'⊿',
|
|
'lsaquo;'=>'‹',
|
|
'lscr;'=>'퓁',
|
|
'lsh;'=>'↰',
|
|
'lsim;'=>'≲',
|
|
'lsime;'=>'⪍',
|
|
'lsimg;'=>'⪏',
|
|
'lsqb;'=>'[',
|
|
'lsquo;'=>'‘',
|
|
'lsquor;'=>'‚',
|
|
'lstrok;'=>'ł',
|
|
'lt'=>'<',
|
|
'lt;'=>'<',
|
|
'ltcc;'=>'⪦',
|
|
'ltcir;'=>'⩹',
|
|
'ltdot;'=>'⋖',
|
|
'lthree;'=>'⋋',
|
|
'ltimes;'=>'⋉',
|
|
'ltlarr;'=>'⥶',
|
|
'ltquest;'=>'⩻',
|
|
'ltrPar;'=>'⦖',
|
|
'ltri;'=>'◃',
|
|
'ltrie;'=>'⊴',
|
|
'ltrif;'=>'◂',
|
|
'lurdshar;'=>'⥊',
|
|
'luruhar;'=>'⥦',
|
|
'lvertneqq;'=>'≨︀',
|
|
'lvnE;'=>'≨︀',
|
|
'mDDot;'=>'∺',
|
|
'macr'=>'¯',
|
|
'macr;'=>'¯',
|
|
'male;'=>'♂',
|
|
'malt;'=>'✠',
|
|
'maltese;'=>'✠',
|
|
'map;'=>'↦',
|
|
'mapsto;'=>'↦',
|
|
'mapstodown;'=>'↧',
|
|
'mapstoleft;'=>'↤',
|
|
'mapstoup;'=>'↥',
|
|
'marker;'=>'▮',
|
|
'mcomma;'=>'⨩',
|
|
'mcy;'=>'м',
|
|
'mdash;'=>'—',
|
|
'measuredangle;'=>'∡',
|
|
'mfr;'=>'픪',
|
|
'mho;'=>'℧',
|
|
'micro'=>'µ',
|
|
'micro;'=>'µ',
|
|
'mid;'=>'∣',
|
|
'midast;'=>'*',
|
|
'midcir;'=>'⫰',
|
|
'middot'=>'·',
|
|
'middot;'=>'·',
|
|
'minus;'=>'−',
|
|
'minusb;'=>'⊟',
|
|
'minusd;'=>'∸',
|
|
'minusdu;'=>'⨪',
|
|
'mlcp;'=>'⫛',
|
|
'mldr;'=>'…',
|
|
'mnplus;'=>'∓',
|
|
'models;'=>'⊧',
|
|
'mopf;'=>'핞',
|
|
'mp;'=>'∓',
|
|
'mscr;'=>'퓂',
|
|
'mstpos;'=>'∾',
|
|
'mu;'=>'μ',
|
|
'multimap;'=>'⊸',
|
|
'mumap;'=>'⊸',
|
|
'nGg;'=>'⋙̸',
|
|
'nGt;'=>'≫⃒',
|
|
'nGtv;'=>'≫̸',
|
|
'nLeftarrow;'=>'⇍',
|
|
'nLeftrightarrow;'=>'⇎',
|
|
'nLl;'=>'⋘̸',
|
|
'nLt;'=>'≪⃒',
|
|
'nLtv;'=>'≪̸',
|
|
'nRightarrow;'=>'⇏',
|
|
'nVDash;'=>'⊯',
|
|
'nVdash;'=>'⊮',
|
|
'nabla;'=>'∇',
|
|
'nacute;'=>'ń',
|
|
'nang;'=>'∠⃒',
|
|
'nap;'=>'≉',
|
|
'napE;'=>'⩰̸',
|
|
'napid;'=>'≋̸',
|
|
'napos;'=>'ʼn',
|
|
'napprox;'=>'≉',
|
|
'natur;'=>'♮',
|
|
'natural;'=>'♮',
|
|
'naturals;'=>'ℕ',
|
|
'nbsp'=>' ',
|
|
'nbsp;'=>' ',
|
|
'nbump;'=>'≎̸',
|
|
'nbumpe;'=>'≏̸',
|
|
'ncap;'=>'⩃',
|
|
'ncaron;'=>'ň',
|
|
'ncedil;'=>'ņ',
|
|
'ncong;'=>'≇',
|
|
'ncongdot;'=>'⩭̸',
|
|
'ncup;'=>'⩂',
|
|
'ncy;'=>'н',
|
|
'ndash;'=>'–',
|
|
'ne;'=>'≠',
|
|
'neArr;'=>'⇗',
|
|
'nearhk;'=>'⤤',
|
|
'nearr;'=>'↗',
|
|
'nearrow;'=>'↗',
|
|
'nedot;'=>'≐̸',
|
|
'nequiv;'=>'≢',
|
|
'nesear;'=>'⤨',
|
|
'nesim;'=>'≂̸',
|
|
'nexist;'=>'∄',
|
|
'nexists;'=>'∄',
|
|
'nfr;'=>'픫',
|
|
'ngE;'=>'≧̸',
|
|
'nge;'=>'≱',
|
|
'ngeq;'=>'≱',
|
|
'ngeqq;'=>'≧̸',
|
|
'ngeqslant;'=>'⩾̸',
|
|
'nges;'=>'⩾̸',
|
|
'ngsim;'=>'≵',
|
|
'ngt;'=>'≯',
|
|
'ngtr;'=>'≯',
|
|
'nhArr;'=>'⇎',
|
|
'nharr;'=>'↮',
|
|
'nhpar;'=>'⫲',
|
|
'ni;'=>'∋',
|
|
'nis;'=>'⋼',
|
|
'nisd;'=>'⋺',
|
|
'niv;'=>'∋',
|
|
'njcy;'=>'њ',
|
|
'nlArr;'=>'⇍',
|
|
'nlE;'=>'≦̸',
|
|
'nlarr;'=>'↚',
|
|
'nldr;'=>'‥',
|
|
'nle;'=>'≰',
|
|
'nleftarrow;'=>'↚',
|
|
'nleftrightarrow;'=>'↮',
|
|
'nleq;'=>'≰',
|
|
'nleqq;'=>'≦̸',
|
|
'nleqslant;'=>'⩽̸',
|
|
'nles;'=>'⩽̸',
|
|
'nless;'=>'≮',
|
|
'nlsim;'=>'≴',
|
|
'nlt;'=>'≮',
|
|
'nltri;'=>'⋪',
|
|
'nltrie;'=>'⋬',
|
|
'nmid;'=>'∤',
|
|
'nopf;'=>'핟',
|
|
'not'=>'¬',
|
|
'not;'=>'¬',
|
|
'notin;'=>'∉',
|
|
'notinE;'=>'⋹̸',
|
|
'notindot;'=>'⋵̸',
|
|
'notinva;'=>'∉',
|
|
'notinvb;'=>'⋷',
|
|
'notinvc;'=>'⋶',
|
|
'notni;'=>'∌',
|
|
'notniva;'=>'∌',
|
|
'notnivb;'=>'⋾',
|
|
'notnivc;'=>'⋽',
|
|
'npar;'=>'∦',
|
|
'nparallel;'=>'∦',
|
|
'nparsl;'=>'⫽⃥',
|
|
'npart;'=>'∂̸',
|
|
'npolint;'=>'⨔',
|
|
'npr;'=>'⊀',
|
|
'nprcue;'=>'⋠',
|
|
'npre;'=>'⪯̸',
|
|
'nprec;'=>'⊀',
|
|
'npreceq;'=>'⪯̸',
|
|
'nrArr;'=>'⇏',
|
|
'nrarr;'=>'↛',
|
|
'nrarrc;'=>'⤳̸',
|
|
'nrarrw;'=>'↝̸',
|
|
'nrightarrow;'=>'↛',
|
|
'nrtri;'=>'⋫',
|
|
'nrtrie;'=>'⋭',
|
|
'nsc;'=>'⊁',
|
|
'nsccue;'=>'⋡',
|
|
'nsce;'=>'⪰̸',
|
|
'nscr;'=>'퓃',
|
|
'nshortmid;'=>'∤',
|
|
'nshortparallel;'=>'∦',
|
|
'nsim;'=>'≁',
|
|
'nsime;'=>'≄',
|
|
'nsimeq;'=>'≄',
|
|
'nsmid;'=>'∤',
|
|
'nspar;'=>'∦',
|
|
'nsqsube;'=>'⋢',
|
|
'nsqsupe;'=>'⋣',
|
|
'nsub;'=>'⊄',
|
|
'nsubE;'=>'⫅̸',
|
|
'nsube;'=>'⊈',
|
|
'nsubset;'=>'⊂⃒',
|
|
'nsubseteq;'=>'⊈',
|
|
'nsubseteqq;'=>'⫅̸',
|
|
'nsucc;'=>'⊁',
|
|
'nsucceq;'=>'⪰̸',
|
|
'nsup;'=>'⊅',
|
|
'nsupE;'=>'⫆̸',
|
|
'nsupe;'=>'⊉',
|
|
'nsupset;'=>'⊃⃒',
|
|
'nsupseteq;'=>'⊉',
|
|
'nsupseteqq;'=>'⫆̸',
|
|
'ntgl;'=>'≹',
|
|
'ntilde'=>'ñ',
|
|
'ntilde;'=>'ñ',
|
|
'ntlg;'=>'≸',
|
|
'ntriangleleft;'=>'⋪',
|
|
'ntrianglelefteq;'=>'⋬',
|
|
'ntriangleright;'=>'⋫',
|
|
'ntrianglerighteq;'=>'⋭',
|
|
'nu;'=>'ν',
|
|
'num;'=>'#',
|
|
'numero;'=>'№',
|
|
'numsp;'=>' ',
|
|
'nvDash;'=>'⊭',
|
|
'nvHarr;'=>'⤄',
|
|
'nvap;'=>'≍⃒',
|
|
'nvdash;'=>'⊬',
|
|
'nvge;'=>'≥⃒',
|
|
'nvgt;'=>'>⃒',
|
|
'nvinfin;'=>'⧞',
|
|
'nvlArr;'=>'⤂',
|
|
'nvle;'=>'≤⃒',
|
|
'nvlt;'=>'<⃒',
|
|
'nvltrie;'=>'⊴⃒',
|
|
'nvrArr;'=>'⤃',
|
|
'nvrtrie;'=>'⊵⃒',
|
|
'nvsim;'=>'∼⃒',
|
|
'nwArr;'=>'⇖',
|
|
'nwarhk;'=>'⤣',
|
|
'nwarr;'=>'↖',
|
|
'nwarrow;'=>'↖',
|
|
'nwnear;'=>'⤧',
|
|
'oS;'=>'Ⓢ',
|
|
'oacute'=>'ó',
|
|
'oacute;'=>'ó',
|
|
'oast;'=>'⊛',
|
|
'ocir;'=>'⊚',
|
|
'ocirc'=>'ô',
|
|
'ocirc;'=>'ô',
|
|
'ocy;'=>'о',
|
|
'odash;'=>'⊝',
|
|
'odblac;'=>'ő',
|
|
'odiv;'=>'⨸',
|
|
'odot;'=>'⊙',
|
|
'odsold;'=>'⦼',
|
|
'oelig;'=>'œ',
|
|
'ofcir;'=>'⦿',
|
|
'ofr;'=>'픬',
|
|
'ogon;'=>'˛',
|
|
'ograve'=>'ò',
|
|
'ograve;'=>'ò',
|
|
'ogt;'=>'⧁',
|
|
'ohbar;'=>'⦵',
|
|
'ohm;'=>'Ω',
|
|
'oint;'=>'∮',
|
|
'olarr;'=>'↺',
|
|
'olcir;'=>'⦾',
|
|
'olcross;'=>'⦻',
|
|
'oline;'=>'‾',
|
|
'olt;'=>'⧀',
|
|
'omacr;'=>'ō',
|
|
'omega;'=>'ω',
|
|
'omicron;'=>'ο',
|
|
'omid;'=>'⦶',
|
|
'ominus;'=>'⊖',
|
|
'oopf;'=>'할',
|
|
'opar;'=>'⦷',
|
|
'operp;'=>'⦹',
|
|
'oplus;'=>'⊕',
|
|
'or;'=>'∨',
|
|
'orarr;'=>'↻',
|
|
'ord;'=>'⩝',
|
|
'order;'=>'ℴ',
|
|
'orderof;'=>'ℴ',
|
|
'ordf'=>'ª',
|
|
'ordf;'=>'ª',
|
|
'ordm'=>'º',
|
|
'ordm;'=>'º',
|
|
'origof;'=>'⊶',
|
|
'oror;'=>'⩖',
|
|
'orslope;'=>'⩗',
|
|
'orv;'=>'⩛',
|
|
'oscr;'=>'ℴ',
|
|
'oslash'=>'ø',
|
|
'oslash;'=>'ø',
|
|
'osol;'=>'⊘',
|
|
'otilde'=>'õ',
|
|
'otilde;'=>'õ',
|
|
'otimes;'=>'⊗',
|
|
'otimesas;'=>'⨶',
|
|
'ouml'=>'ö',
|
|
'ouml;'=>'ö',
|
|
'ovbar;'=>'⌽',
|
|
'par;'=>'∥',
|
|
'para'=>'¶',
|
|
'para;'=>'¶',
|
|
'parallel;'=>'∥',
|
|
'parsim;'=>'⫳',
|
|
'parsl;'=>'⫽',
|
|
'part;'=>'∂',
|
|
'pcy;'=>'п',
|
|
'percnt;'=>'%',
|
|
'period;'=>'.',
|
|
'permil;'=>'‰',
|
|
'perp;'=>'⊥',
|
|
'pertenk;'=>'‱',
|
|
'pfr;'=>'픭',
|
|
'phi;'=>'φ',
|
|
'phiv;'=>'ϕ',
|
|
'phmmat;'=>'ℳ',
|
|
'phone;'=>'☎',
|
|
'pi;'=>'π',
|
|
'pitchfork;'=>'⋔',
|
|
'piv;'=>'ϖ',
|
|
'planck;'=>'ℏ',
|
|
'planckh;'=>'ℎ',
|
|
'plankv;'=>'ℏ',
|
|
'plus;'=>'+',
|
|
'plusacir;'=>'⨣',
|
|
'plusb;'=>'⊞',
|
|
'pluscir;'=>'⨢',
|
|
'plusdo;'=>'∔',
|
|
'plusdu;'=>'⨥',
|
|
'pluse;'=>'⩲',
|
|
'plusmn'=>'±',
|
|
'plusmn;'=>'±',
|
|
'plussim;'=>'⨦',
|
|
'plustwo;'=>'⨧',
|
|
'pm;'=>'±',
|
|
'pointint;'=>'⨕',
|
|
'popf;'=>'핡',
|
|
'pound'=>'£',
|
|
'pound;'=>'£',
|
|
'pr;'=>'≺',
|
|
'prE;'=>'⪳',
|
|
'prap;'=>'⪷',
|
|
'prcue;'=>'≼',
|
|
'pre;'=>'⪯',
|
|
'prec;'=>'≺',
|
|
'precapprox;'=>'⪷',
|
|
'preccurlyeq;'=>'≼',
|
|
'preceq;'=>'⪯',
|
|
'precnapprox;'=>'⪹',
|
|
'precneqq;'=>'⪵',
|
|
'precnsim;'=>'⋨',
|
|
'precsim;'=>'≾',
|
|
'prime;'=>'′',
|
|
'primes;'=>'ℙ',
|
|
'prnE;'=>'⪵',
|
|
'prnap;'=>'⪹',
|
|
'prnsim;'=>'⋨',
|
|
'prod;'=>'∏',
|
|
'profalar;'=>'⌮',
|
|
'profline;'=>'⌒',
|
|
'profsurf;'=>'⌓',
|
|
'prop;'=>'∝',
|
|
'propto;'=>'∝',
|
|
'prsim;'=>'≾',
|
|
'prurel;'=>'⊰',
|
|
'pscr;'=>'퓅',
|
|
'psi;'=>'ψ',
|
|
'puncsp;'=>' ',
|
|
'qfr;'=>'픮',
|
|
'qint;'=>'⨌',
|
|
'qopf;'=>'핢',
|
|
'qprime;'=>'⁗',
|
|
'qscr;'=>'퓆',
|
|
'quaternions;'=>'ℍ',
|
|
'quatint;'=>'⨖',
|
|
'quest;'=>'?',
|
|
'questeq;'=>'≟',
|
|
'quot'=>'"',
|
|
'quot;'=>'"',
|
|
'rAarr;'=>'⇛',
|
|
'rArr;'=>'⇒',
|
|
'rAtail;'=>'⤜',
|
|
'rBarr;'=>'⤏',
|
|
'rHar;'=>'⥤',
|
|
'race;'=>'∽̱',
|
|
'racute;'=>'ŕ',
|
|
'radic;'=>'√',
|
|
'raemptyv;'=>'⦳',
|
|
'rang;'=>'⟩',
|
|
'rangd;'=>'⦒',
|
|
'range;'=>'⦥',
|
|
'rangle;'=>'⟩',
|
|
'raquo'=>'»',
|
|
'raquo;'=>'»',
|
|
'rarr;'=>'→',
|
|
'rarrap;'=>'⥵',
|
|
'rarrb;'=>'⇥',
|
|
'rarrbfs;'=>'⤠',
|
|
'rarrc;'=>'⤳',
|
|
'rarrfs;'=>'⤞',
|
|
'rarrhk;'=>'↪',
|
|
'rarrlp;'=>'↬',
|
|
'rarrpl;'=>'⥅',
|
|
'rarrsim;'=>'⥴',
|
|
'rarrtl;'=>'↣',
|
|
'rarrw;'=>'↝',
|
|
'ratail;'=>'⤚',
|
|
'ratio;'=>'∶',
|
|
'rationals;'=>'ℚ',
|
|
'rbarr;'=>'⤍',
|
|
'rbbrk;'=>'❳',
|
|
'rbrace;'=>'}',
|
|
'rbrack;'=>']',
|
|
'rbrke;'=>'⦌',
|
|
'rbrksld;'=>'⦎',
|
|
'rbrkslu;'=>'⦐',
|
|
'rcaron;'=>'ř',
|
|
'rcedil;'=>'ŗ',
|
|
'rceil;'=>'⌉',
|
|
'rcub;'=>'}',
|
|
'rcy;'=>'р',
|
|
'rdca;'=>'⤷',
|
|
'rdldhar;'=>'⥩',
|
|
'rdquo;'=>'”',
|
|
'rdquor;'=>'”',
|
|
'rdsh;'=>'↳',
|
|
'real;'=>'ℜ',
|
|
'realine;'=>'ℛ',
|
|
'realpart;'=>'ℜ',
|
|
'reals;'=>'ℝ',
|
|
'rect;'=>'▭',
|
|
'reg'=>'®',
|
|
'reg;'=>'®',
|
|
'rfisht;'=>'⥽',
|
|
'rfloor;'=>'⌋',
|
|
'rfr;'=>'픯',
|
|
'rhard;'=>'⇁',
|
|
'rharu;'=>'⇀',
|
|
'rharul;'=>'⥬',
|
|
'rho;'=>'ρ',
|
|
'rhov;'=>'ϱ',
|
|
'rightarrow;'=>'→',
|
|
'rightarrowtail;'=>'↣',
|
|
'rightharpoondown;'=>'⇁',
|
|
'rightharpoonup;'=>'⇀',
|
|
'rightleftarrows;'=>'⇄',
|
|
'rightleftharpoons;'=>'⇌',
|
|
'rightrightarrows;'=>'⇉',
|
|
'rightsquigarrow;'=>'↝',
|
|
'rightthreetimes;'=>'⋌',
|
|
'ring;'=>'˚',
|
|
'risingdotseq;'=>'≓',
|
|
'rlarr;'=>'⇄',
|
|
'rlhar;'=>'⇌',
|
|
'rlm;'=>'',
|
|
'rmoust;'=>'⎱',
|
|
'rmoustache;'=>'⎱',
|
|
'rnmid;'=>'⫮',
|
|
'roang;'=>'⟭',
|
|
'roarr;'=>'⇾',
|
|
'robrk;'=>'⟧',
|
|
'ropar;'=>'⦆',
|
|
'ropf;'=>'핣',
|
|
'roplus;'=>'⨮',
|
|
'rotimes;'=>'⨵',
|
|
'rpar;'=>')',
|
|
'rpargt;'=>'⦔',
|
|
'rppolint;'=>'⨒',
|
|
'rrarr;'=>'⇉',
|
|
'rsaquo;'=>'›',
|
|
'rscr;'=>'퓇',
|
|
'rsh;'=>'↱',
|
|
'rsqb;'=>']',
|
|
'rsquo;'=>'’',
|
|
'rsquor;'=>'’',
|
|
'rthree;'=>'⋌',
|
|
'rtimes;'=>'⋊',
|
|
'rtri;'=>'▹',
|
|
'rtrie;'=>'⊵',
|
|
'rtrif;'=>'▸',
|
|
'rtriltri;'=>'⧎',
|
|
'ruluhar;'=>'⥨',
|
|
'rx;'=>'℞',
|
|
'sacute;'=>'ś',
|
|
'sbquo;'=>'‚',
|
|
'sc;'=>'≻',
|
|
'scE;'=>'⪴',
|
|
'scap;'=>'⪸',
|
|
'scaron;'=>'š',
|
|
'sccue;'=>'≽',
|
|
'sce;'=>'⪰',
|
|
'scedil;'=>'ş',
|
|
'scirc;'=>'ŝ',
|
|
'scnE;'=>'⪶',
|
|
'scnap;'=>'⪺',
|
|
'scnsim;'=>'⋩',
|
|
'scpolint;'=>'⨓',
|
|
'scsim;'=>'≿',
|
|
'scy;'=>'с',
|
|
'sdot;'=>'⋅',
|
|
'sdotb;'=>'⊡',
|
|
'sdote;'=>'⩦',
|
|
'seArr;'=>'⇘',
|
|
'searhk;'=>'⤥',
|
|
'searr;'=>'↘',
|
|
'searrow;'=>'↘',
|
|
'sect'=>'§',
|
|
'sect;'=>'§',
|
|
'semi;'=>';',
|
|
'seswar;'=>'⤩',
|
|
'setminus;'=>'∖',
|
|
'setmn;'=>'∖',
|
|
'sext;'=>'✶',
|
|
'sfr;'=>'픰',
|
|
'sfrown;'=>'⌢',
|
|
'sharp;'=>'♯',
|
|
'shchcy;'=>'щ',
|
|
'shcy;'=>'ш',
|
|
'shortmid;'=>'∣',
|
|
'shortparallel;'=>'∥',
|
|
'shy'=>'',
|
|
'shy;'=>'',
|
|
'sigma;'=>'σ',
|
|
'sigmaf;'=>'ς',
|
|
'sigmav;'=>'ς',
|
|
'sim;'=>'∼',
|
|
'simdot;'=>'⩪',
|
|
'sime;'=>'≃',
|
|
'simeq;'=>'≃',
|
|
'simg;'=>'⪞',
|
|
'simgE;'=>'⪠',
|
|
'siml;'=>'⪝',
|
|
'simlE;'=>'⪟',
|
|
'simne;'=>'≆',
|
|
'simplus;'=>'⨤',
|
|
'simrarr;'=>'⥲',
|
|
'slarr;'=>'←',
|
|
'smallsetminus;'=>'∖',
|
|
'smashp;'=>'⨳',
|
|
'smeparsl;'=>'⧤',
|
|
'smid;'=>'∣',
|
|
'smile;'=>'⌣',
|
|
'smt;'=>'⪪',
|
|
'smte;'=>'⪬',
|
|
'smtes;'=>'⪬︀',
|
|
'softcy;'=>'ь',
|
|
'sol;'=>'/',
|
|
'solb;'=>'⧄',
|
|
'solbar;'=>'⌿',
|
|
'sopf;'=>'핤',
|
|
'spades;'=>'♠',
|
|
'spadesuit;'=>'♠',
|
|
'spar;'=>'∥',
|
|
'sqcap;'=>'⊓',
|
|
'sqcaps;'=>'⊓︀',
|
|
'sqcup;'=>'⊔',
|
|
'sqcups;'=>'⊔︀',
|
|
'sqsub;'=>'⊏',
|
|
'sqsube;'=>'⊑',
|
|
'sqsubset;'=>'⊏',
|
|
'sqsubseteq;'=>'⊑',
|
|
'sqsup;'=>'⊐',
|
|
'sqsupe;'=>'⊒',
|
|
'sqsupset;'=>'⊐',
|
|
'sqsupseteq;'=>'⊒',
|
|
'squ;'=>'□',
|
|
'square;'=>'□',
|
|
'squarf;'=>'▪',
|
|
'squf;'=>'▪',
|
|
'srarr;'=>'→',
|
|
'sscr;'=>'퓈',
|
|
'ssetmn;'=>'∖',
|
|
'ssmile;'=>'⌣',
|
|
'sstarf;'=>'⋆',
|
|
'star;'=>'☆',
|
|
'starf;'=>'★',
|
|
'straightepsilon;'=>'ϵ',
|
|
'straightphi;'=>'ϕ',
|
|
'strns;'=>'¯',
|
|
'sub;'=>'⊂',
|
|
'subE;'=>'⫅',
|
|
'subdot;'=>'⪽',
|
|
'sube;'=>'⊆',
|
|
'subedot;'=>'⫃',
|
|
'submult;'=>'⫁',
|
|
'subnE;'=>'⫋',
|
|
'subne;'=>'⊊',
|
|
'subplus;'=>'⪿',
|
|
'subrarr;'=>'⥹',
|
|
'subset;'=>'⊂',
|
|
'subseteq;'=>'⊆',
|
|
'subseteqq;'=>'⫅',
|
|
'subsetneq;'=>'⊊',
|
|
'subsetneqq;'=>'⫋',
|
|
'subsim;'=>'⫇',
|
|
'subsub;'=>'⫕',
|
|
'subsup;'=>'⫓',
|
|
'succ;'=>'≻',
|
|
'succapprox;'=>'⪸',
|
|
'succcurlyeq;'=>'≽',
|
|
'succeq;'=>'⪰',
|
|
'succnapprox;'=>'⪺',
|
|
'succneqq;'=>'⪶',
|
|
'succnsim;'=>'⋩',
|
|
'succsim;'=>'≿',
|
|
'sum;'=>'∑',
|
|
'sung;'=>'♪',
|
|
'sup1'=>'¹',
|
|
'sup1;'=>'¹',
|
|
'sup2'=>'²',
|
|
'sup2;'=>'²',
|
|
'sup3'=>'³',
|
|
'sup3;'=>'³',
|
|
'sup;'=>'⊃',
|
|
'supE;'=>'⫆',
|
|
'supdot;'=>'⪾',
|
|
'supdsub;'=>'⫘',
|
|
'supe;'=>'⊇',
|
|
'supedot;'=>'⫄',
|
|
'suphsol;'=>'⟉',
|
|
'suphsub;'=>'⫗',
|
|
'suplarr;'=>'⥻',
|
|
'supmult;'=>'⫂',
|
|
'supnE;'=>'⫌',
|
|
'supne;'=>'⊋',
|
|
'supplus;'=>'⫀',
|
|
'supset;'=>'⊃',
|
|
'supseteq;'=>'⊇',
|
|
'supseteqq;'=>'⫆',
|
|
'supsetneq;'=>'⊋',
|
|
'supsetneqq;'=>'⫌',
|
|
'supsim;'=>'⫈',
|
|
'supsub;'=>'⫔',
|
|
'supsup;'=>'⫖',
|
|
'swArr;'=>'⇙',
|
|
'swarhk;'=>'⤦',
|
|
'swarr;'=>'↙',
|
|
'swarrow;'=>'↙',
|
|
'swnwar;'=>'⤪',
|
|
'szlig'=>'ß',
|
|
'szlig;'=>'ß',
|
|
'target;'=>'⌖',
|
|
'tau;'=>'τ',
|
|
'tbrk;'=>'⎴',
|
|
'tcaron;'=>'ť',
|
|
'tcedil;'=>'ţ',
|
|
'tcy;'=>'т',
|
|
'tdot;'=>'⃛',
|
|
'telrec;'=>'⌕',
|
|
'tfr;'=>'픱',
|
|
'there4;'=>'∴',
|
|
'therefore;'=>'∴',
|
|
'theta;'=>'θ',
|
|
'thetasym;'=>'ϑ',
|
|
'thetav;'=>'ϑ',
|
|
'thickapprox;'=>'≈',
|
|
'thicksim;'=>'∼',
|
|
'thinsp;'=>' ',
|
|
'thkap;'=>'≈',
|
|
'thksim;'=>'∼',
|
|
'thorn'=>'þ',
|
|
'thorn;'=>'þ',
|
|
'tilde;'=>'˜',
|
|
'times'=>'×',
|
|
'times;'=>'×',
|
|
'timesb;'=>'⊠',
|
|
'timesbar;'=>'⨱',
|
|
'timesd;'=>'⨰',
|
|
'tint;'=>'∭',
|
|
'toea;'=>'⤨',
|
|
'top;'=>'⊤',
|
|
'topbot;'=>'⌶',
|
|
'topcir;'=>'⫱',
|
|
'topf;'=>'핥',
|
|
'topfork;'=>'⫚',
|
|
'tosa;'=>'⤩',
|
|
'tprime;'=>'‴',
|
|
'trade;'=>'™',
|
|
'triangle;'=>'▵',
|
|
'triangledown;'=>'▿',
|
|
'triangleleft;'=>'◃',
|
|
'trianglelefteq;'=>'⊴',
|
|
'triangleq;'=>'≜',
|
|
'triangleright;'=>'▹',
|
|
'trianglerighteq;'=>'⊵',
|
|
'tridot;'=>'◬',
|
|
'trie;'=>'≜',
|
|
'triminus;'=>'⨺',
|
|
'triplus;'=>'⨹',
|
|
'trisb;'=>'⧍',
|
|
'tritime;'=>'⨻',
|
|
'trpezium;'=>'⏢',
|
|
'tscr;'=>'퓉',
|
|
'tscy;'=>'ц',
|
|
'tshcy;'=>'ћ',
|
|
'tstrok;'=>'ŧ',
|
|
'twixt;'=>'≬',
|
|
'twoheadleftarrow;'=>'↞',
|
|
'twoheadrightarrow;'=>'↠',
|
|
'uArr;'=>'⇑',
|
|
'uHar;'=>'⥣',
|
|
'uacute'=>'ú',
|
|
'uacute;'=>'ú',
|
|
'uarr;'=>'↑',
|
|
'ubrcy;'=>'ў',
|
|
'ubreve;'=>'ŭ',
|
|
'ucirc'=>'û',
|
|
'ucirc;'=>'û',
|
|
'ucy;'=>'у',
|
|
'udarr;'=>'⇅',
|
|
'udblac;'=>'ű',
|
|
'udhar;'=>'⥮',
|
|
'ufisht;'=>'⥾',
|
|
'ufr;'=>'픲',
|
|
'ugrave'=>'ù',
|
|
'ugrave;'=>'ù',
|
|
'uharl;'=>'↿',
|
|
'uharr;'=>'↾',
|
|
'uhblk;'=>'▀',
|
|
'ulcorn;'=>'⌜',
|
|
'ulcorner;'=>'⌜',
|
|
'ulcrop;'=>'⌏',
|
|
'ultri;'=>'◸',
|
|
'umacr;'=>'ū',
|
|
'uml'=>'¨',
|
|
'uml;'=>'¨',
|
|
'uogon;'=>'ų',
|
|
'uopf;'=>'핦',
|
|
'uparrow;'=>'↑',
|
|
'updownarrow;'=>'↕',
|
|
'upharpoonleft;'=>'↿',
|
|
'upharpoonright;'=>'↾',
|
|
'uplus;'=>'⊎',
|
|
'upsi;'=>'υ',
|
|
'upsih;'=>'ϒ',
|
|
'upsilon;'=>'υ',
|
|
'upuparrows;'=>'⇈',
|
|
'urcorn;'=>'⌝',
|
|
'urcorner;'=>'⌝',
|
|
'urcrop;'=>'⌎',
|
|
'uring;'=>'ů',
|
|
'urtri;'=>'◹',
|
|
'uscr;'=>'퓊',
|
|
'utdot;'=>'⋰',
|
|
'utilde;'=>'ũ',
|
|
'utri;'=>'▵',
|
|
'utrif;'=>'▴',
|
|
'uuarr;'=>'⇈',
|
|
'uuml'=>'ü',
|
|
'uuml;'=>'ü',
|
|
'uwangle;'=>'⦧',
|
|
'vArr;'=>'⇕',
|
|
'vBar;'=>'⫨',
|
|
'vBarv;'=>'⫩',
|
|
'vDash;'=>'⊨',
|
|
'vangrt;'=>'⦜',
|
|
'varepsilon;'=>'ϵ',
|
|
'varkappa;'=>'ϰ',
|
|
'varnothing;'=>'∅',
|
|
'varphi;'=>'ϕ',
|
|
'varpi;'=>'ϖ',
|
|
'varpropto;'=>'∝',
|
|
'varr;'=>'↕',
|
|
'varrho;'=>'ϱ',
|
|
'varsigma;'=>'ς',
|
|
'varsubsetneq;'=>'⊊︀',
|
|
'varsubsetneqq;'=>'⫋︀',
|
|
'varsupsetneq;'=>'⊋︀',
|
|
'varsupsetneqq;'=>'⫌︀',
|
|
'vartheta;'=>'ϑ',
|
|
'vartriangleleft;'=>'⊲',
|
|
'vartriangleright;'=>'⊳',
|
|
'vcy;'=>'в',
|
|
'vdash;'=>'⊢',
|
|
'vee;'=>'∨',
|
|
'veebar;'=>'⊻',
|
|
'veeeq;'=>'≚',
|
|
'vellip;'=>'⋮',
|
|
'verbar;'=>'|',
|
|
'vert;'=>'|',
|
|
'vfr;'=>'픳',
|
|
'vltri;'=>'⊲',
|
|
'vnsub;'=>'⊂⃒',
|
|
'vnsup;'=>'⊃⃒',
|
|
'vopf;'=>'핧',
|
|
'vprop;'=>'∝',
|
|
'vrtri;'=>'⊳',
|
|
'vscr;'=>'퓋',
|
|
'vsubnE;'=>'⫋︀',
|
|
'vsubne;'=>'⊊︀',
|
|
'vsupnE;'=>'⫌︀',
|
|
'vsupne;'=>'⊋︀',
|
|
'vzigzag;'=>'⦚',
|
|
'wcirc;'=>'ŵ',
|
|
'wedbar;'=>'⩟',
|
|
'wedge;'=>'∧',
|
|
'wedgeq;'=>'≙',
|
|
'weierp;'=>'℘',
|
|
'wfr;'=>'픴',
|
|
'wopf;'=>'함',
|
|
'wp;'=>'℘',
|
|
'wr;'=>'≀',
|
|
'wreath;'=>'≀',
|
|
'wscr;'=>'퓌',
|
|
'xcap;'=>'⋂',
|
|
'xcirc;'=>'◯',
|
|
'xcup;'=>'⋃',
|
|
'xdtri;'=>'▽',
|
|
'xfr;'=>'픵',
|
|
'xhArr;'=>'⟺',
|
|
'xharr;'=>'⟷',
|
|
'xi;'=>'ξ',
|
|
'xlArr;'=>'⟸',
|
|
'xlarr;'=>'⟵',
|
|
'xmap;'=>'⟼',
|
|
'xnis;'=>'⋻',
|
|
'xodot;'=>'⨀',
|
|
'xopf;'=>'합',
|
|
'xoplus;'=>'⨁',
|
|
'xotime;'=>'⨂',
|
|
'xrArr;'=>'⟹',
|
|
'xrarr;'=>'⟶',
|
|
'xscr;'=>'퓍',
|
|
'xsqcup;'=>'⨆',
|
|
'xuplus;'=>'⨄',
|
|
'xutri;'=>'△',
|
|
'xvee;'=>'⋁',
|
|
'xwedge;'=>'⋀',
|
|
'yacute'=>'ý',
|
|
'yacute;'=>'ý',
|
|
'yacy;'=>'я',
|
|
'ycirc;'=>'ŷ',
|
|
'ycy;'=>'ы',
|
|
'yen'=>'¥',
|
|
'yen;'=>'¥',
|
|
'yfr;'=>'픶',
|
|
'yicy;'=>'ї',
|
|
'yopf;'=>'핪',
|
|
'yscr;'=>'퓎',
|
|
'yucy;'=>'ю',
|
|
'yuml'=>'ÿ',
|
|
'yuml;'=>'ÿ',
|
|
'zacute;'=>'ź',
|
|
'zcaron;'=>'ž',
|
|
'zcy;'=>'з',
|
|
'zdot;'=>'ż',
|
|
'zeetrf;'=>'ℨ',
|
|
'zeta;'=>'ζ',
|
|
'zfr;'=>'픷',
|
|
'zhcy;'=>'ж',
|
|
'zigrarr;'=>'⇝',
|
|
'zopf;'=>'핫',
|
|
'zscr;'=>'퓏',
|
|
'zwj;'=>'',
|
|
'zwnj;'=>'');
|
|
|
|
protected static $entityReplacementTable=array(0x0D => "\n", # 0x000A LINE FEED (LF)
|
|
0x80 => "€", # 0x20AC EURO SIGN
|
|
0x81 => "\xEF\xBF\xBD", # 0xFFFD REPLACEMENT CHARACTER
|
|
0x82 => "‚", # 0x201A SINGLE LOW-9 QUOTATION MRK
|
|
0x83 => "ƒ", # 0x0192 LATIN SMALL LETTER F WITH HOOK
|
|
0x84 => "„", # 0x201E DOUBLE LOW-9 QUOTATION MARK
|
|
0x85 => "…", # 0x2026 HORIZONTAL ELLIPSIS
|
|
0x86 => "†", # 0x2020 DAGGER
|
|
0x87 => "‡", # 0x2021 DOUBLE DAGGER
|
|
0x88 => "ˆ", # 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
|
|
0x89 => "‰", # 0x2030 PER MILLE SIGN
|
|
0x8A => "Š", # 0x0160 LATIN CAPITAL LETTER S WITH CARON
|
|
0x8B => "‹", # 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
|
0x8C => "Œ", # 0x0152 LATIN CAPITAL LIGATURE OE
|
|
0x8D => "\xEF\xBF\xBD", # 0xFFFD REPLACEMENT CHARACTER
|
|
0x8E => "Ž", # 0x017D LATIN CAPITAL LETTER Z WITH CARON
|
|
0x8F => "\xEF\xBF\xBD", # 0xFFFD REPLACEMENT CHARACTER
|
|
0x90 => "\xEF\xBF\xBD", # 0xFFFD REPLACEMENT CHARACTER
|
|
0x91 => "‘", # 0x2018 LEFT SINGLE QUOTATION MARK
|
|
0x92 => "’", # 0x2019 RIGHT SINGLE QUOTATION MARK
|
|
0x93 => "“", # 0x201C LEFT DOUBLE QUOTATION MARK
|
|
0x94 => "”", # 0x201D RIGHT DOUBLE QUOTATION MARK
|
|
0x95 => "•", # 0x2022 BULLET
|
|
0x96 => "–", # 0x2013 EN DASH
|
|
0x97 => "—", # 0x2014 EM DASH
|
|
0x98 => "˜", # 0x02DC SMALL TILDE
|
|
0x99 => "™", # 0x2122 TRADE MARK SIGN
|
|
0x9A => "š", # 0x0161 LATIN SMALL LETTER S WITH CARON
|
|
0x9B => "›", # 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
|
0x9C => "œ", # 0x0153 LATIN SMALL LIGATURE OE
|
|
0x9D => "\xEF\xBF\xBD", # 0xFFFD REPLACEMENT CHARACTER
|
|
0x9E => "ž", # 0x017E LATIN SMALL LETTER Z WITH CARON
|
|
0x9F => "Ÿ" # 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
|
|
);
|
|
|
|
# Used by some insertion modes to
|
|
# return to the previous insertion mode.
|
|
protected static $oMode=null;
|
|
|
|
# Current integer byte position.
|
|
protected static $pointer=0;
|
|
|
|
# Toggle used by the tree builder to turn quirks mode on.
|
|
# Can either be true, false, or 'limited'.
|
|
protected static $quirksMode=false;
|
|
|
|
# Elements that have special processing instructions. Used by the tree
|
|
# builder.
|
|
protected static $specialElements=array('html' => array('address','applet','area','article','aside',
|
|
'base','basefont','bgsound','blockquote',
|
|
'body','br','button','caption','center','col',
|
|
'colgroup','command','dd','details','dir','div',
|
|
'dl','dt','embed','fieldset','figcaption','figure',
|
|
'footer','form','frame','frameset','h1','h2','h3',
|
|
'h4','h5','h6','head','header','hgroup','hr','html',
|
|
'iframe','img','input','isindex','li','link',
|
|
'listing','marquee','main','menu','meta','nav','noembed',
|
|
'noframes','noscript','object','ol','p','param',
|
|
'plaintext','pre','script','section','select',
|
|
'style','summary','table','tbody','td','textarea',
|
|
'tfoot','th','thead','title','tr','ul','wbr','xmp',
|
|
'#document','#document-fragment'),
|
|
'mathml' => array('mi','mo','mn','ms','mtext','annotation-xml'),
|
|
'svg' => array('foreignObject','desc','title'));
|
|
|
|
# Stack of open elements.
|
|
protected static $stack=array();
|
|
|
|
# Size of static::$stack.
|
|
protected static $stackSize=0;
|
|
|
|
# Controls the primary operation of the tokenizer.
|
|
protected static $state='data';
|
|
|
|
# Current non-emitted token.
|
|
protected static $token=array();
|
|
|
|
# The last open element in the stack.
|
|
protected static $currentNode=null;
|
|
|
|
# The name of the last open element in the stack.
|
|
protected static $currentNodeName=null;
|
|
|
|
# Bunches of reusable characters.
|
|
const ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
|
|
const UPPER_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
|
const LOWER_ALPHA = 'abcdefghijklmnopqrstuvwxyz';
|
|
const DIGIT = '0123456789';
|
|
const HEX = '0123456789ABCDEFabcdef';
|
|
const WHITESPACE = "\t\n\x0c ";
|
|
# Regex used when selecting next, previous, etc. non-whitespace text nodes and
|
|
# when collapsing whitespace when pretty printing in HTML5::serialize().
|
|
const WHITESPACEREGEX = '/^[ \t\n\r\x0c\x85
]+$/S';
|
|
|
|
# Parses the HTML document and returns a DOMDocument.
|
|
# @param $data The string data to parse.
|
|
static function parse($data)
|
|
{
|
|
# Set the error handler.
|
|
set_error_handler(array(__CLASS__,'errorHandler'),error_reporting());
|
|
|
|
# If there's no input data send a fatal error.
|
|
if(!is_string($data))
|
|
return static::fatalError('string expected',__METHOD__,gettype($data));
|
|
|
|
# Process the input stream.
|
|
static::processInputStream($data);
|
|
|
|
//while(static::tokenize($data)===true){}
|
|
static::tokenize($data);
|
|
|
|
# Reset the class.
|
|
static::$active=array();
|
|
static::$activeSize=0;
|
|
static::$context=null;
|
|
static::$data=null;
|
|
static::$EOF=0;
|
|
static::$buffer='';
|
|
static::$attributenamebuffer='';
|
|
static::$form=null;
|
|
static::$framesetOk=true;
|
|
static::$head=null;
|
|
static::$pendingTableCharacterTokens=array();
|
|
static::$mode='initial';
|
|
static::$oMode=null;
|
|
static::$pointer=0;
|
|
static::$quirksMode=false;
|
|
static::$stack=array();
|
|
static::$stackSize=0;
|
|
static::$state='data';
|
|
static::$token=array();
|
|
static::$currentNode=null;
|
|
static::$currentNodeName=null;
|
|
|
|
restore_error_handler();
|
|
|
|
# Fix the DOM before outputting.
|
|
return static::fixDOM();
|
|
}
|
|
|
|
static function parseFragment($data,$dom=null,$context=null)
|
|
{
|
|
# If the provided DOM is null then any context element would return errors because
|
|
# of its nonexistence in the DOM. Prevent that by nullifying the context.
|
|
if(is_null($dom))
|
|
$context=null;
|
|
|
|
# Create a new Document node, and mark it as being an HTML document.
|
|
# If a DOMDocument isn't supplied then create one.
|
|
static::$DOM=($dom->nodeType==XML_DOCUMENT_NODE) ? $dom : DOMImplementation::createDocument();
|
|
static::$DOMFragment=static::$DOM->createDocumentFragment();
|
|
# If there is a context element, and the Document of the context element is in
|
|
# quirks mode, then let the Document be in quirks mode. Otherwise, if there is a
|
|
# context element, and the Document of the context element is in limited-quirks
|
|
# mode, then let the Document be in limited-quirks mode. Otherwise, leave the
|
|
# Document in no-quirks mode.
|
|
|
|
# Cannot check whether the context element is in quirks mode. The default
|
|
# value for static::$quirksMode is false anyway.
|
|
|
|
# DEVIATION: The spec's version of parsing fragments isn't remotely useful in the
|
|
# context this library is intended for use in. This implementation uses a
|
|
# DOMDocumentFragment for inserting nodes into. There's no need to have a
|
|
# different process for when there isn't a context. There will always be one.
|
|
if(is_null($context))
|
|
{
|
|
$context=static::$DOMFragment;
|
|
# With a document fragment the state will always be 'data'.
|
|
static::$state='data';
|
|
}
|
|
else
|
|
{
|
|
# Change the tokenization stage based upon what the context element is.
|
|
$name=$context->nodeName;
|
|
if($name=='title' || $name=='textarea')
|
|
static::$state='RCDATA';
|
|
elseif($name=='style' || $name=='xmp' || $name=='iframe' || $name=='noembed' || $name=='noframes')
|
|
static::$state='RAWTEXT';
|
|
elseif($name=='script')
|
|
static::$state='script';
|
|
elseif($name=='plaintext')
|
|
static::$state='plaintext';
|
|
else
|
|
static::$state='data';
|
|
}
|
|
|
|
# Create a new HTML parser, and associate it with the just created Document node.
|
|
static::$fragment=true;
|
|
static::$context=$context;
|
|
|
|
# DEVIATION: This implementation uses a DOMDocumentFragment for inserting nodes
|
|
# into. There's no need to make a dummy html element.
|
|
|
|
# Push the context onto the stack, so it can be referenced as the context element.
|
|
static::stackPush($context);
|
|
# Reset the parser's insertion mode appropriately.
|
|
static::resetInsertionMode();
|
|
|
|
# Set the parser's form element pointer to the nearest node to the context element
|
|
# that is a form element (going straight up the ancestor chain, and including the
|
|
# element itself, if it is a form element), or, if there is no such form element,
|
|
# to null.
|
|
static::$form=($context && static::hasAncestor('form',$context)) ? $context : null;
|
|
|
|
# Place into the input stream for the HTML parser just created the input. The
|
|
# encoding confidence is irrelevant. Start the parser and let it run until it has
|
|
# consumed all the characters just inserted into the input stream.
|
|
# NOTE: The encoding confidence is ignored because everything is converted to
|
|
# UTF-8.
|
|
|
|
static::parse($data);
|
|
|
|
# If there is a context element, return the child nodes of root, in tree order.
|
|
# DEVIATION: Returns a document fragment instead.
|
|
$output=static::$DOMFragment;
|
|
static::$DOMFragment=null;
|
|
static::$fragment=false;
|
|
static::$context=null;
|
|
|
|
return $output;
|
|
}
|
|
|
|
# Fix id attributes and join adjacent text nodes. To be used after the DOM is
|
|
# manipulated and before outputting.
|
|
protected static function fixDOM($dom=null)
|
|
{
|
|
if(!$dom)
|
|
$dom=&static::$DOM;
|
|
|
|
# Fix id attributes so they may be selected by the DOM.
|
|
if(!static::$fragment) $dom->relaxNGValidateSource(static::$relaxNG);
|
|
|
|
# Normalize the document before outputting.
|
|
$dom->normalize();
|
|
|
|
$output=$dom;
|
|
static::$DOM=null;
|
|
|
|
return $output;
|
|
}
|
|
|
|
protected static function tokenize($data)
|
|
{
|
|
while(true)
|
|
{
|
|
if(static::$debug)
|
|
echo "state: ".static::$state."\n";
|
|
|
|
switch(static::$state)
|
|
{
|
|
case 'data':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='&')
|
|
{
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>static::consumeEntity()));
|
|
}
|
|
elseif($char=='<')
|
|
static::$state='tag open';
|
|
elseif($char===false)
|
|
{
|
|
static::emitToken(array('type'=>'eof')); # EOF
|
|
return false;
|
|
}
|
|
elseif($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
{
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
{
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char.static::consumeUntil('&<')));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RCDATA':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='&')
|
|
{
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>static::consumeEntity()));
|
|
}
|
|
elseif($char=='<')
|
|
static::$state='RCDATA less-than sign';
|
|
elseif($char===false)
|
|
{
|
|
static::emitToken(array('type'=>'eof')); # EOF
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char.static::consumeUntil('&<')));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RAWTEXT':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='<')
|
|
static::$state='RAWTEXT less-than sign';
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::emitToken(array('type'=>'eof'));
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char.static::consumeUntil('<')));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data':
|
|
{
|
|
$char=static::consume();
|
|
if($char=='<')
|
|
static::$state='script data less-than sign';
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::emitToken(array('type'=>'eof'));
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char.static::consumeUntil('<')));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'PLAINTEXT':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char===false) # EOF
|
|
{
|
|
static::emitToken(array('type'=>'eof'));
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char.static::consumeUntil('')));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'tag open':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='!')
|
|
static::$state='markup declaration open';
|
|
elseif($char=='/')
|
|
static::$state='end tag open';
|
|
elseif(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
static::$token=array('type'=>'start tag',
|
|
'name'=>strtolower($char));
|
|
|
|
static::$state='tag name';
|
|
}
|
|
elseif($char=='?')
|
|
{
|
|
static::parseError('tag name expected','?');
|
|
static::$state='bogus comment';
|
|
}
|
|
else
|
|
{
|
|
if($char!==false)
|
|
static::parseError('tag name expected',$char);
|
|
else
|
|
static::parseError('unexpected eof tag name');
|
|
|
|
static::$state='data';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'end tag open':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
static::$token=array('type'=>'end tag',
|
|
'name'=>strtolower($char));
|
|
|
|
static::$state='tag name';
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('tag name expected','>');
|
|
static::$state='data';
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof tag name');
|
|
|
|
static::$state='data';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'</'));
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('tag name expected',$char);
|
|
static::$state='bogus comment';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'tag name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before attribute name';
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof tag name');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital letters.
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::$token['name'].=strtolower($char).static::consumeUntil(static::WHITESPACE."/>");
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RCDATA less-than sign':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='/')
|
|
{
|
|
static::$buffer='';
|
|
static::$state='RCDATA end tag open';
|
|
}
|
|
else
|
|
{
|
|
static::$state='RCDATA';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RCDATA end tag open':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
static::$token=array('type'=>'end tag',
|
|
'name'=>strtolower($char));
|
|
static::$buffer.=$char;
|
|
static::$state='RCDATA end tag name';
|
|
}
|
|
else
|
|
{
|
|
static::$state='RCDATA';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'</'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RCDATA end tag name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
static::$token['name'].=strtolower($char);
|
|
static::$buffer.=$char;
|
|
}
|
|
# If the current token is an appropriate end tag token.
|
|
# Optimization. MUCH faster to check this first.
|
|
elseif(static::$token['name']==static::$currentNodeName)
|
|
{
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before attribute name';
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
static::$state='RCDATA';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>"</".static::$buffer));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RAWTEXT less-than sign':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='/')
|
|
{
|
|
static::$buffer='';
|
|
static::$state='RAWTEXT end tag open';
|
|
}
|
|
else
|
|
{
|
|
static::$state='RAWTEXT';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RAWTEXT end tag open':
|
|
{
|
|
$char=static::consume();
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
# Optimization. Consume as many alpha characters as possible.
|
|
static::$token=array('type'=>'end tag',
|
|
'name'=>strtolower($char.static::consumeWhile(static::ALPHA)));
|
|
static::$buffer.=$char;
|
|
static::$state='RAWTEXT end tag name';
|
|
}
|
|
else
|
|
{
|
|
static::$state='RAWTEXT';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'</'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'RAWTEXT end tag name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
# Optimization. Consume as many alpha characters as possible.
|
|
static::$token['name'].=strtolower($char.static::consumeWhile(static::ALPHA));
|
|
static::$buffer.=$char;
|
|
}
|
|
# If the current token is an appropriate end tag token.
|
|
# Optimization. MUCH faster to check this first.
|
|
elseif(static::$token['name']==static::$currentNodeName)
|
|
{
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before attribute name';
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
static::$state='RAWTEXT';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>"</".static::$buffer));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data less-than sign':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='/')
|
|
{
|
|
static::$buffer='';
|
|
static::$state='script data end tag open';
|
|
}
|
|
elseif($char=='!')
|
|
{
|
|
static::$state='script data escape start';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<!'));
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data end tag open':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
static::$token=array('type'=>'end tag',
|
|
'name'=>strtolower($char));
|
|
static::$buffer.=$char;
|
|
static::$state='script data end tag name';
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'</'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data end tag name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
static::$token['name'].=strtolower($char);
|
|
static::$buffer.=$char;
|
|
}
|
|
# If the current token is an appropriate end tag token.
|
|
# Optimization. MUCH faster to check this first.
|
|
elseif(static::$token['name']==static::$currentNodeName)
|
|
{
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before attribute name';
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>"</".static::$buffer));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escape start':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::$state='script data escape start dash';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escape start dash':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::$state='script data escaped dash dash';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escaped':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::$state='script data escaped dash';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
}
|
|
elseif($char=='<')
|
|
static::$state='script data escaped less-than sign';
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof escaped script data');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char.static::consumeUntil('-<')));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escaped dash':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::$state='script data escaped dash dash';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
}
|
|
elseif($char=='<')
|
|
static::$state='script data escaped less-than sign';
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof escaped script data');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escaped dash dash':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
elseif($char=='<')
|
|
static::$state='script data escaped less-than sign';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='script data';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'>'));
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof escaped script data');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escaped less-than sign':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='/')
|
|
{
|
|
static::$buffer='';
|
|
static::$state='script data escaped end tag open';
|
|
}
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
elseif(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
static::$buffer=strtolower($char);
|
|
static::$state='script data double escape start';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'.$char));
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escaped end tag open':
|
|
{
|
|
$char=static::consume();
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
static::$token=array('type'=>'end tag',
|
|
'name'=>strtolower($char));
|
|
static::$buffer.=$char;
|
|
static::$state='script data escaped end tag name';
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'</'));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data escaped end tag name':
|
|
{
|
|
$char=static::consume();
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
if(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
static::$token['name'].=strtolower($char);
|
|
static::$buffer.=$char;
|
|
}
|
|
# If the current token is an appropriate end tag token.
|
|
# Optimization. MUCH faster to check this first.
|
|
elseif(static::$token['name']==static::$currentNodeName)
|
|
{
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before attribute name';
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>"</".static::$buffer));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data double escape start':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ' || $char=='/' || $char=='>')
|
|
{
|
|
static::$state=(static::$buffer=='script') ? 'script data double escaped' : 'script data escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
elseif(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
# Go ahead and consume everything that's ASCII alpha so this doesn't have to
|
|
# repeatedly loop back.
|
|
$char.=static::consumeWhile(static::ALPHA);
|
|
|
|
# Append the lowercase version to the buffer.
|
|
static::$buffer.=strtolower($char);
|
|
# Emit upper and lower as character tokens.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data escaped';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data double escaped':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::$state='script data double escaped dash';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
}
|
|
elseif($char=='<')
|
|
{
|
|
static::$state='script data double escaped less-than sign';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof double escaped script data');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char.static::consumeUntil('-<')));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data double escaped dash':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::$state='script data double escaped dash dash';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
}
|
|
elseif($char=='<')
|
|
{
|
|
static::$state='script data double escaped less-than sign';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof double escaped script data');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data double escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data double escaped dash dash':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'-'));
|
|
}
|
|
elseif($char=='<')
|
|
{
|
|
static::$state='script data double escaped less-than sign';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'<'));
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='script data';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'>'));
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof double escaped script data');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data double escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data double escape less-than sign':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='/')
|
|
{
|
|
static::$buffer='';
|
|
static::$state='script data double escape end';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>'/'));
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data double escaped';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'script data double escape end':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ' || $char=='/' || $char=='>')
|
|
{
|
|
static::$state=(static::$buffer=='script') ? 'script data escaped' : 'script data double escaped';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
}
|
|
# Faster to just strtolower everything than to check separately
|
|
# for capital and lowercase.
|
|
elseif(ctype_alpha($char)) # [A-Za-z]
|
|
{
|
|
static::$token['name'].=strtolower($char.static::consumeWhile(static::ALPHA));
|
|
static::$buffer.=$char;
|
|
}
|
|
else
|
|
{
|
|
static::$state='script data double escaped';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'before attribute name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
# Faster to use ctype_upper than < & >.
|
|
elseif(ctype_upper($char)) # [A-Z]
|
|
{
|
|
static::$attributenamebuffer=strtolower($char.static::consumeWhile(static::UPPER_ALPHA));
|
|
static::$state='attribute name';
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof attribute name');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
elseif($char=='"' || $char=="'" || $char=='<' || $char=='=')
|
|
static::parseError('attribute name expected',$char);
|
|
else
|
|
{
|
|
# Optimization that makes checking for attribute name validity simpler.
|
|
static::$attributenamebuffer=$char;
|
|
static::$state='attribute name';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'attribute name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
# The spec states to check the validity of the attribute name before
|
|
# leaving the attribute name state or before emitting a token. Since
|
|
# in this implementation the attribute name is stored in a buffer it's
|
|
# only added if it is valid.
|
|
|
|
# Conceded it was best to check the validity of the attribute name
|
|
# within each if statement. Any other method was either much slower
|
|
# or too cumbersome. It's repetitive code, but oh well.
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
{
|
|
if(isset(static::$token['attributes'][static::$attributenamebuffer]))
|
|
static::parseError('attribute exists',static::$attributenamebuffer);
|
|
else
|
|
static::$token['attributes'][static::$attributenamebuffer]=null;
|
|
|
|
static::$state='after attribute name';
|
|
}
|
|
elseif($char=='/')
|
|
{
|
|
if(isset(static::$token['attributes'][static::$attributenamebuffer]))
|
|
static::parseError('attribute exists',static::$attributenamebuffer);
|
|
else
|
|
static::$token['attributes'][static::$attributenamebuffer]=null;
|
|
|
|
static::$state='self-closing start tag';
|
|
}
|
|
elseif($char=='=')
|
|
{
|
|
if(isset(static::$token['attributes'][static::$attributenamebuffer]))
|
|
static::parseError('attribute exists',static::$attributenamebuffer);
|
|
else
|
|
static::$token['attributes'][static::$attributenamebuffer]=null;
|
|
|
|
static::$state='before attribute value';
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
if(isset(static::$token['attributes'][static::$attributenamebuffer]))
|
|
static::parseError('attribute exists',static::$attributenamebuffer);
|
|
else
|
|
static::$token['attributes'][static::$attributenamebuffer]=null;
|
|
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
# Faster to use ctype_upper than < & >.
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
elseif(ctype_upper($char)) # [A-Z]
|
|
static::$attributenamebuffer.=strtolower($char.static::consumeUntil(static::WHITESPACE."/=>\"'<"));
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof attribute name');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
if($char=='"' || $char=="'" || $char=='<')
|
|
static::parseError('attribute name expected',$char);
|
|
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::$attributenamebuffer.=$char.static::consumeUntil(static::WHITESPACE."/=>\"'<".static::UPPER_ALPHA);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after attribute name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='=')
|
|
static::$state='before attribute value';
|
|
elseif($char=='>')
|
|
{
|
|
if(isset(static::$token['attributes'][static::$attributenamebuffer]))
|
|
static::parseError('attribute exists',static::$attributenamebuffer);
|
|
else
|
|
static::$token['attributes'][static::$attributenamebuffer]=null;
|
|
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof attribute value tag end');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
if($char=='"' || $char=="'" || $char=='<')
|
|
static::parseError('attribute value tag end expected',$char);
|
|
|
|
static::$attributenamebuffer=$char;
|
|
static::$state='attribute name';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'before attribute value':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='"')
|
|
static::$state='attribute value (double-quoted)';
|
|
elseif($char=='&')
|
|
{
|
|
static::$state='attribute value (unquoted)';
|
|
static::unconsume();
|
|
}
|
|
elseif($char=="'")
|
|
static::$state='attribute value (single-quoted)';
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('attribute value expected',$char);
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof attribute value');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
if($char=='<' || $char=='=' || $char=='`')
|
|
static::parseError('attribute value expected',$char);
|
|
|
|
static::$token['attributes'][static::$attributenamebuffer].=$char;
|
|
static::$state='attribute value (unquoted)';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'attribute value (double-quoted)':
|
|
{
|
|
$char=static::consume();
|
|
if($char=='"')
|
|
{
|
|
static::$state='after attribute value (quoted)';
|
|
|
|
# Set the attribute name to an empty string instead of null.
|
|
$currentAttribute=static::$token['attributes'][static::$attributenamebuffer];
|
|
if(is_null(static::$token['attributes'][static::$attributenamebuffer]))
|
|
static::$token['attributes'][static::$attributenamebuffer]='';
|
|
}
|
|
|
|
# Instead of going to a separate state to consume the reference then
|
|
# returning back to the this state just do it all here. Quicker.
|
|
# Performs the actions of the 'character reference in attribute value'
|
|
# state.
|
|
elseif($char=='&')
|
|
static::$token['attributes'][static::$attributenamebuffer].=static::consumeEntity('"',true);
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('double-quoted attribute value expected eof');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
static::$token['attributes'][static::$attributenamebuffer].=$char.static::consumeUntil('"&');
|
|
}
|
|
break;
|
|
|
|
case 'attribute value (single-quoted)':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="'")
|
|
{
|
|
static::$state='after attribute value (quoted)';
|
|
|
|
# Set the attribute name to an empty string instead of null.
|
|
static::$token['attributes'][static::$attributenamebuffer]='';
|
|
}
|
|
# Instead of going to a separate state to consume the reference then
|
|
# returning back to the this state just do it all here. Quicker.
|
|
# Performs the actions of the 'character reference in attribute value'
|
|
# state.
|
|
elseif($char=='&')
|
|
static::$token['attributes'][static::$attributenamebuffer].=static::consumeEntity("'",true);
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('single-quoted attribute value expected eof');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
static::$token['attributes'][static::$attributenamebuffer].=$char.static::consumeUntil("'&");
|
|
}
|
|
break;
|
|
|
|
case 'attribute value (unquoted)':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before attribute name';
|
|
# Instead of going to a separate state to consume the reference then
|
|
# returning back to the this state just do it all here. Quicker.
|
|
# Performs the actions of the 'character reference in attribute value'
|
|
# state.
|
|
elseif($char=='&')
|
|
static::$token['attributes'][static::$attributenamebuffer].=static::consumeEntity('>',true);
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof unquoted attribute value');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
if($char=='"' || $char=="'" || $char=='<' || $char=='=' || $char=='`')
|
|
static::parseError('unquoted attribute value expected',$char);
|
|
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::$token['attributes'][static::$attributenamebuffer].=$char.static::consumeUntil(static::WHITESPACE."&\"'<>=`");
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after attribute value (quoted)':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before attribute name';
|
|
elseif($char=='/')
|
|
static::$state='self-closing start tag';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof attribute name tag end');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('attribute name tag end expected',$char);
|
|
static::$state='before attribute name';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'self-closing start tag':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='>')
|
|
{
|
|
static::$token['selfClosing']=true;
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof attribute name tag end');
|
|
static::$state='data';
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('attribute name tag end expected',$char);
|
|
static::$state='before attribute name';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'bogus comment':
|
|
{
|
|
# Consume every character up to and including the first greater than sign.
|
|
# Data for token contains the character which caused the state machine to
|
|
# switch into the bogus comment state, in other words the last character
|
|
# within $char. Data then includes the characters except the trailing '>'.
|
|
$char=$char.static::consumeUntil('>'); # Consumes everything to '>'.
|
|
$check=static::consume(); # Consumes the greater than sign.
|
|
|
|
# If not EOF emit comment token. If EOF emit empty comment token,
|
|
# switch to the data state, and unconsume the character.
|
|
if($check!==false)
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(array('type'=>'comment',
|
|
'data'=>$char));
|
|
}
|
|
else
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(array('type'=>'comment',
|
|
'data'=>''));
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'markup declaration open':
|
|
{
|
|
# If the next 2 characters are -- consume those characters, create a
|
|
# comment token, and switch to the comment start state.
|
|
|
|
if(static::peek(2)=='--')
|
|
{
|
|
static::consume(2);
|
|
static::$token=array('type'=>'comment',
|
|
'data'=>'');
|
|
static::$state='comment start';
|
|
}
|
|
# Otherwise if the next 7 characters case-insensitively equal 'doctype'
|
|
# then consume those 7 characters and switch to the doctype state.
|
|
else if(strtolower(static::peek(7))=='doctype')
|
|
{
|
|
static::consume(7);
|
|
static::$state='DOCTYPE';
|
|
}
|
|
# Otherwise if the last open element in the stack is not in the HTML
|
|
# namespace and the next seven characters are a case-sensitive match
|
|
# for the string "[CDATA[" then consume those characters and switch
|
|
# to the CDATA section state.
|
|
|
|
# TODO: After tree building is implemented check for namespaces here.
|
|
elseif(static::peek(7)=='[CDATA[')
|
|
{
|
|
static::consume(7);
|
|
static::$state='CDATA section';
|
|
}
|
|
# Otherwise trigger a parse error. Switch to the bogus comment state.
|
|
else
|
|
{
|
|
static::parseError('doctype dashes cdata expected',$char);
|
|
static::$state='bogus comment';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'comment start':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
static::$state='comment start dash';
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('comment expected','>');
|
|
static::$token['data'].='>';
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof comment');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$token['data'].=$char;
|
|
static::$state='comment';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'comment start dash':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
static::$state='comment end';
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('comment expected','>');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof comment');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$token['data'].="-".$char;
|
|
static::$state='comment';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'comment':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
static::$state='comment end dash';
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof comment');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
static::$token['data'].=$char.static::consumeUntil('-');
|
|
}
|
|
break;
|
|
|
|
case 'comment end dash':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
static::$state='comment end';
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof comment');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$token['data'].="-".$char;
|
|
static::$state='comment';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'comment end':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char=='!')
|
|
{
|
|
static::parseError('comment end expected','!');
|
|
static::$state='comment end bang';
|
|
}
|
|
elseif($char=='-')
|
|
{
|
|
static::parseError('comment end expected','-');
|
|
static::$token['data'].='-';
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof comment end');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('comment end expected',$char);
|
|
static::$token['data'].="--".$char;
|
|
static::$state='comment';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'comment end bang':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='-')
|
|
{
|
|
static::$token['data'].="--!";
|
|
static::$state='comment end dash';
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof comment end');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::$token['data'].="--!".$char;
|
|
static::$state='comment';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'DOCTYPE':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before DOCTYPE name';
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype name');
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
# Spec states to trigger a parse error here, but it's unnecessary since
|
|
# the same damn error's going to be triggered in the 'before DOCTYPE name'
|
|
# state.
|
|
static::$state='before DOCTYPE name';
|
|
static::unconsume();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'before DOCTYPE name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('DOCTYPE name expected','>');
|
|
static::$state='data';
|
|
static::emitToken(array('type'=>'DOCTYPE',
|
|
'quirksMode'=>true));
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype name');
|
|
static::$state='data';
|
|
static::emitToken(array('type'=>'DOCTYPE',
|
|
'quirksMode'=>true));
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
# Optimization. Faster to strtolower everything than to check
|
|
# for capital letters first as it takes less time to change the
|
|
# case than it does to check for it.
|
|
static::$token=array('type'=>'DOCTYPE',
|
|
'name'=>strtolower($char));
|
|
|
|
static::$state='DOCTYPE name';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'DOCTYPE name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='after DOCTYPE name';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype name');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Faster to strtolower everything than to check
|
|
# for capital letters first as it takes less time to change the
|
|
# case than it does to check for it.
|
|
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
# Strtolower that, too.
|
|
else
|
|
static::$token['name'].=strtolower($char.static::consumeUntil(static::WHITESPACE.'>'));
|
|
}
|
|
break;
|
|
|
|
case 'after DOCTYPE name':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false)
|
|
{
|
|
static::parseError('unexpected eof doctype keyword end tag');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
# Optimization. More times than not there's not going to be 'publicID' here, so
|
|
# checking just the current input character first is quicker in most cases.
|
|
if(strtolower($char)=='p')
|
|
{
|
|
if(strtolower($char.static::peek(5))=='public')
|
|
{
|
|
static::consume(5);
|
|
static::$state='after DOCTYPE public keyword';
|
|
}
|
|
}
|
|
# Optimization. More times than not there's not going to be 'systemID' here, so
|
|
# checking just the current input character first is quicker in most cases.
|
|
elseif(strtolower($char)=='s')
|
|
{
|
|
if(strtolower($char.static::peek(5))=='systemID')
|
|
{
|
|
static::consume(5);
|
|
static::$state='after DOCTYPE system keyword';
|
|
}
|
|
}
|
|
else
|
|
{
|
|
static::parseError('doctype keyword end tag expected',$char);
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after DOCTYPE public keyword':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before DOCTYPE public identifier';
|
|
elseif($char=='"')
|
|
{
|
|
static::parseError('doctype public identifier expected','"');
|
|
static::$token['publicID']="";
|
|
static::$state='DOCTYPE public identifier (double-quoted)';
|
|
}
|
|
elseif($char=="'")
|
|
{
|
|
static::parseError('doctype public identifier expected',"'");
|
|
static::$token['publicID']="";
|
|
static::$state='DOCTYPE public identifier (single-quoted)';
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('doctype public identifier expected','>');
|
|
static::$state='data';
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype public identifier');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('doctype public identifier expected',$char);
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'before DOCTYPE public identifier':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='"')
|
|
{
|
|
static::$token['publicID']="";
|
|
static::$state='DOCTYPE public identifier (double-quoted)';
|
|
}
|
|
elseif($char=="'")
|
|
{
|
|
static::$token['publicID']="";
|
|
static::$state='DOCTYPE public identifier (single-quoted)';
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('doctype public identifier expected','>');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype public identifier');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('doctype public identifier expected',$char);
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'DOCTYPE public identifier (double-quoted)':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='"')
|
|
static::$state='after DOCTYPE public identifier';
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('double-quoted doctype public identifier expected','>');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype public identifier');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
static::$token['publicID'].=$char.static::consumeUntil('">');
|
|
}
|
|
break;
|
|
|
|
case 'DOCTYPE public identifier (single-quoted)':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="'")
|
|
static::$state='after DOCTYPE public identifier';
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('single-quoted doctype public identifier expected','>');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('single-quoted doctype public identifier expected eof');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
static::$token['publicID'].=$char.static::consumeUntil("'>");
|
|
}
|
|
break;
|
|
|
|
case 'after DOCTYPE public identifier':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='between DOCTYPE public and system identifiers';
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char=='"')
|
|
{
|
|
static::parseError('doctype system identifier expected','"');
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (double-quoted)';
|
|
}
|
|
elseif($char=="'")
|
|
{
|
|
static::parseError('doctype system identifier expected',"'");
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (single-quoted)';
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype system identifier');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('doctype system identifier expected',$char);
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'between DOCTYPE public and system identifiers':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char=='"')
|
|
{
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (double-quoted)';
|
|
}
|
|
elseif($char=="'")
|
|
{
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (single-quoted)';
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof doctype system identifier');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('doctype system identifier expected',$char);
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after DOCTYPE system keyword':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
static::$state='before DOCTYPE system identifier';
|
|
elseif($char=='"')
|
|
{
|
|
static::parseError('doctype system identifier expected','"');
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (double-quoted)';
|
|
}
|
|
elseif($char=="'")
|
|
{
|
|
static::parseError('doctype system identifier expected',"'");
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (single-quoted)';
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('doctype system identifier expected','>');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false)
|
|
{
|
|
static::parseError('unexpected eof DOCTYPE system identifier');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('doctype system identifier expected',$char);
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'before DOCTYPE system identifier':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='"')
|
|
{
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (double-quoted)';
|
|
}
|
|
elseif($char=="'")
|
|
{
|
|
static::$token['systemID']="";
|
|
static::$state='DOCTYPE system identifier (single-quoted)';
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('doctype system identifier expected','>');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof DOCTYPE system identifier');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('doctype system identifier expected',$char);
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'DOCTYPE system identifier (double-quoted)':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=='"')
|
|
static::$state='after DOCTYPE system identifier';
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('double-quoted doctype system identifier expected','>');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('double-quoted doctype system identifier expected eof');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
static::$token['systemID'].=$char.static::consumeUntil('">');
|
|
}
|
|
break;
|
|
|
|
case 'DOCTYPE system identifier (single-quoted)':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="'")
|
|
static::$state='after DOCTYPE system identifier';
|
|
elseif($char=='>')
|
|
{
|
|
static::parseError('single-quoted doctype system identifier expected','>');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('single-quoted doctype system identifier expected eof');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
else
|
|
static::$token['systemID'].=$char.static::consumeUntil("'>");
|
|
}
|
|
break;
|
|
|
|
case 'after DOCTYPE system identifier':
|
|
{
|
|
$char=static::consume();
|
|
|
|
if($char=="\t" || $char=="\n" || $char=="\x0c" || $char==' ')
|
|
continue;
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
elseif($char===false) # EOF
|
|
{
|
|
static::parseError('unexpected eof tag end');
|
|
static::$token['quirksMode']=true;
|
|
# NOTE: Don't want quirks, but leaving this here for the moment.
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
else
|
|
{
|
|
static::parseError('tag end expected',$char);
|
|
static::$state='bogus DOCTYPE';
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'bogus DOCTYPE':
|
|
{
|
|
# Optimization. Consume as many characters that don't match the other checked
|
|
# characters if they exist instead of looping around here again and again.
|
|
static::consumeUntil('>');
|
|
$char=static::consume();
|
|
|
|
if($char===false)
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
static::unconsume();
|
|
}
|
|
elseif($char=='>')
|
|
{
|
|
static::$state='data';
|
|
static::emitToken(static::$token);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'CDATA section':
|
|
{
|
|
# Consume every character up until the next occurrence of ']]>' or EOF.
|
|
# Emit the consumed characters except the ']]>'.
|
|
$char='';
|
|
while(true)
|
|
{
|
|
# Grab everything up until a ']' or EOF.
|
|
$char.=static::consumeUntil(']');
|
|
$temp=static::peek(3);
|
|
|
|
if($temp===false) # EOF
|
|
{
|
|
static::unconsume();
|
|
# Emit consumed characters as a character token then break out of the while loop.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
break 3;
|
|
}
|
|
elseif($temp==']]>')
|
|
{
|
|
# Emit consumed characters as a character token then break out of the while loop.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$char));
|
|
break;
|
|
}
|
|
# If ']]>' or EOF not encountered then consume the next character and start over.
|
|
else
|
|
$char.=static::consume();
|
|
}
|
|
|
|
# Lastly switch to the data state.
|
|
static::$state='data';
|
|
}
|
|
break;
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
# Method to print the DOM tree out as text.
|
|
# @param $context Context node.
|
|
# @param $options Optional options for the printer.
|
|
static function serialize($context,$options=array())
|
|
{
|
|
$nodeType=$context->nodeType;
|
|
if($nodeType!=XML_ELEMENT_NODE && $nodeType!=XML_DOCUMENT_NODE && $nodeType!=XML_DOCUMENT_FRAG_NODE && $nodeType!=XML_TEXT_NODE)
|
|
{
|
|
switch($nodeType)
|
|
{
|
|
case XML_ATTRIBUTE_NODE: $nodeType='DOMAttr';
|
|
break;
|
|
case XML_CDATA_SECTION_NODE: $nodeType='DOMCdataSection';
|
|
break;
|
|
case XML_ENTITY_REF_NODE: $nodeType='DOMEntityReference';
|
|
break;
|
|
case XML_ENTITY_NODE: $nodeType='DOMEntity';
|
|
break;
|
|
case XML_PI_NODE: $nodeType='DOMProcessingInstruction';
|
|
break;
|
|
case XML_COMMENT_NODE: $nodeType='DOMComment';
|
|
break;
|
|
case XML_DOCUMENT_TYPE_NODE: $nodeType='DOMDocumentType';
|
|
break;
|
|
case XML_NOTATION: $nodeType='DOMNotation';
|
|
break;
|
|
default: $nodeType='null';
|
|
}
|
|
|
|
static::fatalError('domelement document frag expected',__METHOD__,$nodeType);
|
|
}
|
|
|
|
$attributeQuotes='"';
|
|
$prettyPrint=false;
|
|
$indentSpaces=1;
|
|
$indentStep=' ';
|
|
|
|
if(isset($options['attributeQuotes']))
|
|
{
|
|
$type=gettype($options['attributeQuotes']);
|
|
if($type!='boolean' && $type!='integer' && $type!='double' && $type!='string')
|
|
{
|
|
if($type=='object')
|
|
$type=get_class($options['attributeQuotes']);
|
|
static::fatalError('invalid option value type',__METHOD__,'attributeQuotes','string',$type);
|
|
}
|
|
|
|
$attributeQuotes=strtolower($options['attributeQuotes']);
|
|
switch($attributeQuotes)
|
|
{
|
|
case '"':
|
|
case 'double': $attributeQuotes='"';
|
|
break;
|
|
case "'":
|
|
case 'single': $attributeQuotes="'";
|
|
break;
|
|
case 'none':
|
|
case '0':
|
|
case '': $attributeQuotes='';
|
|
break;
|
|
default: static::fatalError('invalid option value',__METHOD__,'attributeQuotes',"'double','single', or 'none'",$attributeQuotes);
|
|
}
|
|
}
|
|
|
|
if(isset($options['prettyPrint']))
|
|
{
|
|
$type=gettype($options['prettyPrint']);
|
|
if($type!='boolean' && $type!='integer' && $type!='double' && $type!='string')
|
|
{
|
|
if($type=='object')
|
|
$type=get_class($options['prettyPrint']);
|
|
static::fatalError('invalid option value type',__METHOD__,'prettyPrint','boolean',$type);
|
|
}
|
|
|
|
$prettyPrint=(bool)$options['prettyPrint'];
|
|
}
|
|
|
|
if(isset($options['indentSpaces']))
|
|
{
|
|
$type=gettype($options['indentSpaces']);
|
|
if($type!='boolean' && $type!='integer' && $type!='double' && $type!='string')
|
|
{
|
|
if($type=='object')
|
|
$type=get_class($options['indentSpaces']);
|
|
static::fatalError('invalid option value type',__METHOD__,'indentSpaces','integer',$type);
|
|
}
|
|
|
|
$indentSpaces=(int)$options['indentSpaces'];
|
|
$indentStep=str_repeat(' ',$indentSpaces);
|
|
}
|
|
|
|
if($nodeType!=XML_DOCUMENT_NODE && $nodeType!=XML_DOCUMENT_FRAG_NODE)
|
|
{
|
|
$frag=$context->ownerDocument->createDocumentFragment();
|
|
$frag->appendChild($context->cloneNode(true));
|
|
$context=$frag;
|
|
}
|
|
|
|
return static::serializer($context,$attributeQuotes,$prettyPrint,$indentSpaces,$indentStep);
|
|
}
|
|
|
|
# Private method used recursively to serialize a document or node.
|
|
private static function serializer($context,$attributeQuotes,$prettyPrint,$indentSpaces,$indentStep)
|
|
{
|
|
static $foreignAncestor=false;
|
|
static $foreignNode=null;
|
|
static $scriptAncestor=false;
|
|
static $scriptNode=null;
|
|
|
|
if($prettyPrint)
|
|
{
|
|
static $indent='';
|
|
static $preAncestor=false;
|
|
static $preNode=null;
|
|
static $headAncestor=false;
|
|
static $headNode=null;
|
|
static $inlineWithBlockElementSiblings=false;
|
|
static $inlineWithBlockElementSiblingsParent=null;
|
|
static $foreignAncestorWithBlockElementSiblings=false;
|
|
static $inlineWithBlockElementDescendants=false;
|
|
static $inlineWithBlockElementDescendantsNode=null;
|
|
static $commentWithBlockElementSiblings=false;
|
|
static $commentWithBlockElementSiblingsParent=null;
|
|
}
|
|
|
|
if (static::$debug) {
|
|
echo "printing: ";
|
|
echo $context->nodeName;
|
|
echo "\n";
|
|
}
|
|
|
|
if($context->hasChildNodes())
|
|
{
|
|
$output="";
|
|
|
|
foreach($context->childNodes as $index=>$node)
|
|
{
|
|
if($prettyPrint)
|
|
{
|
|
$blockElement=false;
|
|
$modify=false;
|
|
}
|
|
|
|
switch($node->nodeType)
|
|
{
|
|
case XML_ELEMENT_NODE:
|
|
{
|
|
# If current node is an element in the HTML namespace, the MathML
|
|
# namespace, or the SVG namespace, then let tagname be current node's
|
|
# local name. Otherwise, let tagname be current node's qualified name.
|
|
$namespace=$node->namespaceURI;
|
|
|
|
if (static::$debug) {
|
|
echo "namespace: ";
|
|
echo $namespace;
|
|
echo "\n";
|
|
}
|
|
|
|
if($namespace=='http://www.w3.org/1998/Math/MathML' || $namespace=='http://www.w3.org/2000/svg')
|
|
{
|
|
if(!$foreignAncestor)
|
|
$foreignNode=$node;
|
|
$foreignAncestor=true;
|
|
# Using localName here because it "fixes" a bug where when manipulating the DOM
|
|
# with SVG and MathML stuff it puts default namespace prefixes in. Nasty.
|
|
$tagName=$node->localName;
|
|
}
|
|
elseif(is_null($namespace))
|
|
$tagName=$node->tagName;
|
|
else
|
|
$tagName=$node->prefix.':'.$node->tagName;
|
|
|
|
/* $colonPos = strpos($tagName, ':');
|
|
if ($colonPos !== false) {
|
|
$tagName = substr($tagName, $colonPos+1);
|
|
}*/
|
|
|
|
if(in_array($tagName,static::$scriptElements,true))
|
|
{
|
|
$scriptAncestor=true;
|
|
$scriptNode=$node;
|
|
}
|
|
|
|
if($prettyPrint)
|
|
{
|
|
if(!$preAncestor)
|
|
{
|
|
if($scriptAncestor)
|
|
$modify=true;
|
|
|
|
if(in_array($tagName,static::$preElements,true))
|
|
{
|
|
$preAncestor=true;
|
|
$modify=true;
|
|
$preNode=$node;
|
|
}
|
|
|
|
if((!$foreignAncestor && !$blockElement && (($headAncestor && in_array($tagName,static::$headBlockElements)) || in_array($tagName,static::$blockElements))))
|
|
{
|
|
$blockElement=true;
|
|
$modify=true;
|
|
}
|
|
|
|
if($headAncestor)
|
|
$modify=true;
|
|
elseif(!$headAncestor && $tagName==='head')
|
|
{
|
|
$headAncestor=true;
|
|
$headNode=$node;
|
|
$modify=true;
|
|
}
|
|
|
|
if(!$blockElement)
|
|
{
|
|
if(!$inlineWithBlockElementSiblings)
|
|
{
|
|
if(($headAncestor && static::hasSibling(static::$headBlockElements,$node)) || static::hasSibling(static::$blockElements,$node))
|
|
{
|
|
$modify=true;
|
|
$inlineWithBlockElementSiblings=true;
|
|
$inlineWithBlockElementSiblingsParent=$node->parentNode;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if($node->parentNode->isSameNode($inlineWithBlockElementSiblingsParent))
|
|
$modify=true;
|
|
elseif(($headAncestor && static::hasSibling(static::$headBlockElements,$node)) || static::hasSibling(static::$blockElements,$node))
|
|
{
|
|
$modify=true;
|
|
$inlineWithBlockElementSiblings=true;
|
|
$inlineWithBlockElementSiblingsParent=$node->parentNode;
|
|
}
|
|
else
|
|
{
|
|
$inlineWithBlockElementSiblings=false;
|
|
$inlineWithBlockElementSiblingsParent=null;
|
|
}
|
|
|
|
if(!$inlineWithBlockElementDescendants && static::hasDescendant(static::$blockElements,$node))
|
|
{
|
|
$modify=true;
|
|
$inlineWithBlockElementDescendants=true;
|
|
$inlineWithBlockElementDescendantsNode=$node;
|
|
}
|
|
}
|
|
|
|
if ($foreignAncestorWithBlockElementSiblings) {
|
|
$modify=true;
|
|
} elseif ($foreignNode && $node->isSameNode($foreignNode) && ($inlineWithBlockElementSiblings || (in_array($node->parentNode->nodeName, static::$blockElements) && $node->isSameNode(static::firstNonWhitespaceTextNodeChild($node->parentNode)) && $node->isSameNode(static::lastNonWhitespaceTextNodeChild($node->parentNode))) || static::hasSibling(static::$blockElements,$foreignNode->parentNode))) {
|
|
$modify = true;
|
|
$foreignAncestorWithBlockElementSiblings = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if($modify)
|
|
{
|
|
$output.="\n".$indent;
|
|
|
|
if($headAncestor && $tagName!=='head' && in_array($tagName,static::$headBlockElements) && !in_array(static::prevNonWhitespaceTextNodeChild($node)->nodeName,static::$headBlockElements))
|
|
$output.="\n".$indent;
|
|
}
|
|
}
|
|
|
|
$output.="<".$tagName;
|
|
|
|
if($node->hasAttributes())
|
|
{
|
|
foreach($node->attributes as $index=>$attr)
|
|
{
|
|
# For each attribute that the element has, append a U+0020 SPACE
|
|
# character, the attribute's serialized name as described below, a
|
|
# U+003D EQUALS SIGN character (=), a U+0022 QUOTATION MARK character
|
|
# ("), the attribute's value, escaped as described below in attribute
|
|
# mode, and a second U+0022 QUOTATION MARK character (").
|
|
|
|
$output.=' ';
|
|
|
|
switch($attr->namespaceURI)
|
|
{
|
|
case null: $output.=$attr->name;
|
|
break;
|
|
case 'http://www.w3.org/XML/1998/namespace': $output.='xml:'.$attr->name;
|
|
break;
|
|
case 'http://www.w3.org/2000/xmlns/': $output.=($attr->name=='xmlns') ? 'xmlns' : 'xmlns:'.$attr->name;
|
|
break;
|
|
case 'http://www.w3.org/1999/xlink': $output.='xlink:'.$attr->name;
|
|
break;
|
|
default: $output.=$attr->prefix.':'.$attr->name;
|
|
}
|
|
|
|
if($foreignAncestor==true)
|
|
$output.='="'.static::escapeString($attr->value,'"').'"';
|
|
elseif($attr->value!=$attr->name)
|
|
$output.='='.$attributeQuotes.static::escapeString($attr->value,$attributeQuotes).$attributeQuotes;
|
|
}
|
|
}
|
|
|
|
if($foreignAncestor==true && !$node->hasChildNodes())
|
|
{
|
|
$output.='/>';
|
|
if($node->isSameNode($foreignNode))
|
|
{
|
|
$foreignAncestor=false;
|
|
$foreignNode=null;
|
|
$foreignAncestorWithBlockElementSiblings=false;
|
|
}
|
|
|
|
goto serializeCleanUp;
|
|
}
|
|
|
|
# Append a U+003E GREATER-THAN SIGN character (>).
|
|
$output.='>';
|
|
|
|
if($foreignAncestor==false)
|
|
{
|
|
# If current node is an area, base, basefont, bgsound, br, col, command,
|
|
# embed, frame, hr, img, input, keygen, link, meta, param, source, track
|
|
# or wbr element, then continue on to the next child node at this point.
|
|
if(in_array($tagName,static::$selfClosingElements))
|
|
{
|
|
if($prettyPrint)
|
|
{
|
|
# Make the markup easier to read by adding additional whitespace.
|
|
if($blockElement && (($headAncestor && in_array($tagName,static::$headBlockElements)) || in_array($tagName,static::$spacedBlockElements)))
|
|
{
|
|
$nextChildName = static::nextNonWhitespaceTextNodeChild($node);
|
|
if ($nextChildName) {
|
|
$nextChildName = $nextChildName->nodeName;
|
|
|
|
if(strpos($nextChildName,'#')===false && static::lastNonWhitespaceTextNodeChild($node->parentNode)!==$node)
|
|
{
|
|
if($tagName=='h1' || $tagName=='h2' || $tagName=='h3' || $tagName=='h4' || $tagName=='h5' || $tagName=='h6')
|
|
{
|
|
if($nextChildName!='h1' && $nextChildName!='h2' && $nextChildName!='h3' && $nextChildName!='h4' && $nextChildName!='h5' && $nextChildName!='h6')
|
|
$output.="\n";
|
|
}
|
|
elseif($nextChildName!=$tagName)
|
|
$output.="\n";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
goto serializeCleanUp;
|
|
}
|
|
}
|
|
|
|
if($prettyPrint && $modify)
|
|
$indent.=$indentStep;
|
|
|
|
# Append the value of running the HTML fragment serialization algorithm
|
|
# on the current node element (thus recursing into this algorithm for
|
|
# that element), followed by a U+003C LESS-THAN SIGN character (<), a
|
|
# U+002F SOLIDUS character (/), tagname again, and finally a U+003E
|
|
# GREATER-THAN SIGN character (>).
|
|
$output.=static::serializer($node,$attributeQuotes,$prettyPrint,$indentSpaces,$indentStep);
|
|
|
|
if($prettyPrint && $modify)
|
|
{
|
|
$indent=substr($indent,0,0-$indentSpaces);
|
|
|
|
if(!$preAncestor &&
|
|
(!$foreignAncestor && ($tagName=='head' || static::hasDescendant(static::$blockElements,$node))) ||
|
|
(static::hasSibling(static::$blockElements,$node) && static::hasChild(['math','svg'], $node)) ||
|
|
($foreignAncestorWithBlockElementSiblings && static::hasDescendant(function($context)
|
|
{return ($context->nodeType==XML_ELEMENT_NODE) ? true : false;},$node))) {
|
|
$output.="\n".$indent;
|
|
}
|
|
}
|
|
|
|
$output.="</".$tagName.">";
|
|
|
|
if($prettyPrint)
|
|
{
|
|
# Make the markup easier to read by adding additional whitespace.
|
|
if($blockElement && (($headAncestor && in_array($tagName,static::$headBlockElements)) || in_array($tagName,static::$spacedBlockElements)))
|
|
{
|
|
$nextChildName = static::nextNonWhitespaceTextNodeChild($node);
|
|
if ($nextChildName) {
|
|
$nextChildName = $nextChildName->nodeName;
|
|
|
|
if(strpos($nextChildName,'#text')===false && !static::lastNonWhitespaceTextNodeChild($node->parentNode)->isSameNode($node))
|
|
{
|
|
|
|
if($tagName=='h1' || $tagName=='h2' || $tagName=='h3' || $tagName=='h4' || $tagName=='h5' || $tagName=='h6')
|
|
{
|
|
if($nextChildName!='h1' && $nextChildName!='h2' && $nextChildName!='h3' && $nextChildName!='h4' && $nextChildName!='h5' && $nextChildName!='h6')
|
|
$output.="\n";
|
|
}
|
|
elseif($nextChildName!==$tagName)
|
|
$output.="\n";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
serializeCleanUp:
|
|
if($scriptAncestor && $node->isSameNode($scriptNode))
|
|
{
|
|
$scriptAncestor=false;
|
|
$scriptNode=null;
|
|
}
|
|
|
|
if($prettyPrint)
|
|
{
|
|
if($preAncestor && $node->isSameNode($preNode))
|
|
{
|
|
$preAncestor=false;
|
|
$preNode=null;
|
|
break;
|
|
}
|
|
|
|
if(!$preAncestor)
|
|
{
|
|
if(!$foreignAncestor)
|
|
{
|
|
if($headAncestor && $node->isSameNode($headNode))
|
|
{
|
|
$headAncestor=false;
|
|
$headNode=null;
|
|
break;
|
|
}
|
|
|
|
if($inlineWithBlockElementSiblings &&
|
|
$node->parentNode!=$inlineWithBlockElementSiblingsParent)
|
|
{
|
|
$inlineWithBlockElementSiblings=false;
|
|
$inlineWithBlockElementSiblingsParent=null;
|
|
}
|
|
|
|
if($commentWithBlockElementSiblings &&
|
|
$node->parentNode!=$commentWithBlockElementSiblingsParent)
|
|
{
|
|
$commentWithBlockElementSiblings=false;
|
|
$commentWithBlockElementSiblingsParent=null;
|
|
}
|
|
|
|
if($inlineWithBlockElementDescendants && $node->isSameNode($inlineWithBlockElementDescendantsNode))
|
|
{
|
|
$inlineWithBlockElementDescendants=false;
|
|
$inlineWithBlockElementDescendantsNode=null;
|
|
}
|
|
|
|
}
|
|
elseif($node->isSameNode($foreignNode))
|
|
{
|
|
$isForeign=false;
|
|
$foreignNode=null;
|
|
$foreignAncestor=false;
|
|
$foreignAncestorWithBlockElementSiblings=false;
|
|
}
|
|
}
|
|
}
|
|
elseif($foreignAncestor && $node->isSameNode($foreignNode))
|
|
{
|
|
$isForeign=false;
|
|
$foreignNode=null;
|
|
$foreignAncestor=false;
|
|
$foreignAncestorWithBlockElementSiblings=false;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case XML_TEXT_NODE:
|
|
{
|
|
$nodeData=$node->data;
|
|
|
|
if($prettyPrint && !$preAncestor && !$scriptAncestor)
|
|
{
|
|
if(($foreignAncestor || in_array($node->parentNode->nodeName,static::$blockElements)) &&
|
|
static::hasSibling(static::$blockElements,$node) &&
|
|
preg_match(static::WHITESPACEREGEX,$nodeData)>0)
|
|
continue 2;
|
|
|
|
$data=preg_replace(array('/[\n\r]/','/( ){2,}/'),array('','$1'),str_replace("\t",' ',$nodeData));
|
|
if($data=='')
|
|
continue 2;
|
|
if($data!=$nodeData)
|
|
$nodeData=$data;
|
|
}
|
|
|
|
if($scriptAncestor)
|
|
{
|
|
# If the script ancestor is a script element and the node data isn't escaped then escape the data.
|
|
$scriptNodeName=$scriptNode->nodeName;
|
|
if($scriptNodeName==='script' && strpos(trim($nodeData),'<!--')!==0)
|
|
$nodeData=static::escapeString($nodeData);
|
|
|
|
# Escape strings that look like the script node's end tag.
|
|
$endTag='</'.$scriptNodeName.'>';
|
|
if(strpos($nodeData,$endTag)!==false)
|
|
$nodeData=str_replace($endTag,'<'.$scriptNodeName.'>',$nodeData);
|
|
}
|
|
else
|
|
$nodeData=static::escapeString($nodeData);
|
|
|
|
$output.=$nodeData;
|
|
}
|
|
break;
|
|
|
|
case XML_CDATA_SECTION_NODE:
|
|
{
|
|
if($prettyPrint && !$preAncestor)
|
|
{
|
|
if(!$modify)
|
|
{
|
|
if($headAncestor || $inlineWithBlockElementSiblings || $commentWithBlockElementSiblings)
|
|
$modify=true;
|
|
elseif(static::hasSibling(static::$blockElements,$node))
|
|
{
|
|
$modify=true;
|
|
$commentWithBlockElementSiblings=true;
|
|
$commentWithBlockElementSiblingsParent=$node->parentNode;
|
|
}
|
|
}
|
|
|
|
if($modify)
|
|
$output.="\n".$indent;
|
|
}
|
|
$output.=static::escapeString($node->data);
|
|
}
|
|
break;
|
|
|
|
case XML_COMMENT_NODE:
|
|
{
|
|
if($prettyPrint && !$preAncestor)
|
|
{
|
|
if(!$modify)
|
|
{
|
|
if($headAncestor || $inlineWithBlockElementSiblings || $commentWithBlockElementSiblings)
|
|
$modify=true;
|
|
elseif(static::hasSibling(static::$blockElements,$node))
|
|
{
|
|
$modify=true;
|
|
$commentWithBlockElementSiblings=true;
|
|
$commentWithBlockElementSiblingsParent=$node->parentNode;
|
|
}
|
|
}
|
|
|
|
if($modify)
|
|
$output.="\n".$indent;
|
|
}
|
|
$output.="<!--".$node->data."-->";
|
|
}
|
|
break;
|
|
|
|
case XML_ENTITY_REF_NODE: $output.='&'.$node->nodeName.';';
|
|
break;
|
|
|
|
case XML_PI_NODE:
|
|
{
|
|
if($prettyPrint && !$preAncestor)
|
|
{
|
|
if(!$modify)
|
|
{
|
|
if($headAncestor || $inlineWithBlockElementSiblings || $commentWithBlockElementSiblings)
|
|
$modify=true;
|
|
elseif(static::hasSibling(static::$blockElements,$node))
|
|
{
|
|
$modify=true;
|
|
$commentWithBlockElementSiblings=true;
|
|
$commentWithBlockElementSiblingsParent=$node->parentNode;
|
|
}
|
|
}
|
|
|
|
if($modify)
|
|
$output.="\n".$indent;
|
|
}
|
|
$output.="<?".$node->target." ".$node->data.">";
|
|
}
|
|
break;
|
|
|
|
case XML_DOCUMENT_TYPE_NODE: $output.="<!DOCTYPE ".$node->name.">";
|
|
break;
|
|
}
|
|
}
|
|
|
|
return $output;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
|
|
public static function hasAncestor($needle,$context,&$match=null)
|
|
{
|
|
if(!$context->nodeType)
|
|
static::fatalError('domnode expected',__METHOD__,'null');
|
|
|
|
$callback=static::getSelectorCallback($needle,__METHOD__);
|
|
|
|
while($context=$context->parentNode)
|
|
{
|
|
hasAncestorLoop:
|
|
$result=$callback($context);
|
|
|
|
if(is_bool($result))
|
|
{
|
|
if($result)
|
|
{
|
|
$match=$context;
|
|
return true;
|
|
}
|
|
}
|
|
elseif(is_int($result))
|
|
{
|
|
if($result)
|
|
$match=$context;
|
|
return (bool)$result;
|
|
}
|
|
elseif($result instanceof DOMNode)
|
|
{
|
|
$context=$result;
|
|
goto hasAncestorLoop;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function hasDescendant($needle,$context,&$match=null)
|
|
{
|
|
$nodeType=$context->nodeType;
|
|
if(!$context->nodeType)
|
|
static::fatalError('domelement document frag expected',__METHOD__,$nodeType);
|
|
|
|
if(!$context->hasChildNodes())
|
|
return false;
|
|
|
|
$callback=static::getSelectorCallback($needle,__METHOD__);
|
|
$context=$context->firstChild;
|
|
do
|
|
{
|
|
//hasDescendantLoop:
|
|
$result=$callback($context);
|
|
|
|
if(is_bool($result))
|
|
{
|
|
if($result)
|
|
{
|
|
$match=$context;
|
|
return true;
|
|
}
|
|
}
|
|
elseif(is_int($result))
|
|
{
|
|
if($result)
|
|
$match=$context;
|
|
return (bool)$result;
|
|
}
|
|
elseif($result instanceof DOMNode)
|
|
{
|
|
$context=$result;
|
|
//goto hasDescendantLoop;
|
|
}
|
|
|
|
if(static::hasDescendant($callback,$context,$match))
|
|
return true;
|
|
}
|
|
while($context=$context->nextSibling);
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function hasChild($needle,$context,&$match=null)
|
|
{
|
|
$nodeType=$context->nodeType;
|
|
if(!$context->nodeType)
|
|
static::fatalError('domelement document frag expected',__METHOD__,$nodeType);
|
|
|
|
if(!$context->hasChildNodes())
|
|
return false;
|
|
|
|
$callback=static::getSelectorCallback($needle,__METHOD__);
|
|
$context=$context->firstChild;
|
|
if($context)
|
|
{
|
|
do
|
|
{
|
|
hasChildLoop:
|
|
$result=$callback($context);
|
|
|
|
if(is_bool($result))
|
|
{
|
|
if($result)
|
|
{
|
|
$match=$context;
|
|
return true;
|
|
}
|
|
}
|
|
elseif(is_int($result))
|
|
{
|
|
if($result)
|
|
$match=$context;
|
|
return (bool)$result;
|
|
}
|
|
elseif($result instanceof DOMNode)
|
|
{
|
|
$context=$result;
|
|
goto hasChildLoop;
|
|
}
|
|
}
|
|
while($context=$context->nextSibling);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function hasChildReverse($needle,$context,&$match=null)
|
|
{
|
|
$nodeType=$context->nodeType;
|
|
if(!$context->nodeType)
|
|
static::fatalError('domelement document frag expected',__METHOD__,$nodeType);
|
|
|
|
if(!$context->hasChildNodes())
|
|
return false;
|
|
|
|
$callback=static::getSelectorCallback($needle,__METHOD__);
|
|
$context=$context->lastChild;
|
|
if($context)
|
|
{
|
|
do
|
|
{
|
|
hasChildReverseLoop:
|
|
$result=$callback($context);
|
|
|
|
if(is_bool($result))
|
|
{
|
|
if($result)
|
|
{
|
|
$match=$context;
|
|
return true;
|
|
}
|
|
}
|
|
elseif(is_int($result))
|
|
{
|
|
if($result)
|
|
$match=$context;
|
|
return (bool)$result;
|
|
}
|
|
elseif($result instanceof DOMNode)
|
|
{
|
|
$context=$result;
|
|
goto hasChildReverseLoop;
|
|
}
|
|
}
|
|
while($context=$context->previousSibling);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function hasSibling($needle,$context,&$match=null)
|
|
{
|
|
if(!$context->nodeType)
|
|
static::fatalError('domnode expected',__METHOD__,'null');
|
|
|
|
$callback=static::getSelectorCallback($needle,__METHOD__);
|
|
|
|
$original=$context;
|
|
$context=$context->parentNode->firstChild;
|
|
|
|
do
|
|
{
|
|
hasSiblingLoop:
|
|
if($context===$original)
|
|
continue;
|
|
|
|
$result=$callback($context);
|
|
|
|
if(is_bool($result))
|
|
{
|
|
if($result)
|
|
{
|
|
$match=$context;
|
|
return true;
|
|
}
|
|
}
|
|
elseif(is_int($result))
|
|
{
|
|
if($result)
|
|
$match=$context;
|
|
return (bool)$result;
|
|
}
|
|
elseif($result instanceof DOMNode)
|
|
{
|
|
$context=$result;
|
|
goto hasSiblingLoop;
|
|
}
|
|
}
|
|
while($context=$context->nextSibling);
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function hasPrecedingSibling($needle,$context,&$match=null)
|
|
{
|
|
if(!$context->nodeType)
|
|
static::fatalError('domnode expected',__METHOD__,'null');
|
|
|
|
$callback=static::getSelectorCallback($needle,__METHOD__);
|
|
|
|
while($context=$context->previousSibling)
|
|
{
|
|
hasPrecedingSiblingLoop:
|
|
$result=$callback($context);
|
|
|
|
if(is_bool($result))
|
|
{
|
|
if($result)
|
|
{
|
|
$match=$context;
|
|
return true;
|
|
}
|
|
}
|
|
elseif(is_int($result))
|
|
{
|
|
if($result)
|
|
$match=$context;
|
|
return (bool)$result;
|
|
}
|
|
elseif($result instanceof DOMNode)
|
|
{
|
|
$context=$result;
|
|
goto hasPrecedingSiblingLoop;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function hasFollowingSibling($context,$needle,&$match=null)
|
|
{
|
|
if(!$context->nodeType)
|
|
static::fatalError('domnode expected',__METHOD__,'null');
|
|
|
|
$callback=static::getSelectorCallback($needle,__METHOD__);
|
|
|
|
while($context=$context->nextSibling)
|
|
{
|
|
hasFollowingSiblingLoop:
|
|
$result=$callback($context);
|
|
|
|
if(is_bool($result))
|
|
{
|
|
if($result)
|
|
{
|
|
$match=$context;
|
|
return true;
|
|
}
|
|
}
|
|
elseif(is_int($result))
|
|
{
|
|
if($result)
|
|
$match=$context;
|
|
return (bool)$result;
|
|
}
|
|
elseif($result instanceof DOMNode)
|
|
{
|
|
$context=$result;
|
|
goto hasFollowingSiblingLoop;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function prevNonWhiteSpaceTextNodeChild($node)
|
|
{
|
|
if(!$node->nodeType)
|
|
static::fatalError('domnode expected',__METHOD__,'null');
|
|
|
|
while($node=$node->previousSibling)
|
|
{
|
|
if($node->nodeType==XML_TEXT_NODE && preg_match(static::WHITESPACEREGEX,$node->data)>0)
|
|
continue;
|
|
|
|
return $node;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public static function nextNonWhiteSpaceTextNodeChild($node)
|
|
{
|
|
if(!$node->nodeType)
|
|
static::fatalError('domnode expected',__METHOD__,'null');
|
|
|
|
while($node=$node->nextSibling)
|
|
{
|
|
if($node->nodeType==XML_TEXT_NODE && preg_match(static::WHITESPACEREGEX,$node->data)>0)
|
|
continue;
|
|
|
|
return $node;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public static function lastNonWhitespaceTextNodeChild($node)
|
|
{
|
|
$nodeType=$node->nodeType;
|
|
if($nodeType!=XML_ELEMENT_NODE && $nodeType!=XML_DOCUMENT_NODE && $nodeType!=XML_DOCUMENT_FRAG_NODE)
|
|
{
|
|
if(!$nodeType)
|
|
static::fatalError('domelement document frag expected',__METHOD__,'null');
|
|
|
|
return false;
|
|
}
|
|
|
|
$node=$node->lastChild;
|
|
do
|
|
{
|
|
if($node->nodeType==XML_TEXT_NODE && preg_match(static::WHITESPACEREGEX,$node->data)>0)
|
|
continue;
|
|
|
|
return $node;
|
|
}
|
|
while($node=$node->previousSibling);
|
|
|
|
return null;
|
|
}
|
|
|
|
public static function firstNonWhitespaceTextNodeChild($node)
|
|
{
|
|
$nodeType=$node->nodeType;
|
|
|
|
if($nodeType!=XML_ELEMENT_NODE && $nodeType!=XML_DOCUMENT_NODE && $nodeType!=XML_DOCUMENT_FRAG_NODE)
|
|
{
|
|
switch($nodeType)
|
|
{
|
|
case XML_ATTRIBUTE_NODE: $nodeType='DOMAttr';
|
|
break;
|
|
case XML_TEXT_NODE: $nodeType='DOMText';
|
|
break;
|
|
case XML_CDATA_SECTION_NODE: $nodeType='DOMCdataSection';
|
|
break;
|
|
case XML_ENTITY_REF_NODE: $nodeType='DOMEntityReference';
|
|
break;
|
|
case XML_ENTITY_NODE: $nodeType='DOMEntity';
|
|
break;
|
|
case XML_PI_NODE: $nodeType='DOMProcessingInstruction';
|
|
break;
|
|
case XML_COMMENT_NODE: $nodeType='DOMComment';
|
|
break;
|
|
case XML_DOCUMENT_TYPE_NODE: $nodeType='DOMDocumentType';
|
|
break;
|
|
case XML_NOTATION: $nodeType='DOMNotation';
|
|
break;
|
|
default: $nodeType='null';
|
|
}
|
|
|
|
static::fatalError('domelement document frag expected',__METHOD__,$nodeType);
|
|
}
|
|
|
|
$node=$node->firstChild;
|
|
do
|
|
{
|
|
if($node->nodeType==XML_TEXT_NODE && preg_match(static::WHITESPACEREGEX,$node->data)>0)
|
|
continue;
|
|
|
|
return $node;
|
|
|
|
break;
|
|
}
|
|
while($node=$node->nextSibling);
|
|
|
|
return false;
|
|
}
|
|
|
|
protected static function getSelectorCallback($needle,$method=null,$type=null)
|
|
{
|
|
if(!$type)
|
|
$type=gettype($needle);
|
|
switch($type)
|
|
{
|
|
case 'boolean':
|
|
case 'integer':
|
|
case 'double':
|
|
case 'string':
|
|
{
|
|
return function($context) use($needle)
|
|
{return $context->nodeName==(string)$needle;};
|
|
}
|
|
break;
|
|
|
|
case 'array':
|
|
{
|
|
return function($context) use($needle)
|
|
{return in_array($context->nodeName,$needle);};
|
|
}
|
|
break;
|
|
|
|
case 'object':
|
|
{
|
|
if(!$needle instanceof Closure)
|
|
static::fatalError('closure expected',$method,get_class($needle));
|
|
}
|
|
|
|
case 'closure': return $needle;
|
|
break;
|
|
|
|
case 'default': static::fatalError('string array closure expected',$method,$type);
|
|
}
|
|
}
|
|
|
|
# Walk through the DOM and perform actions.
|
|
# @param $context Node to check.
|
|
# @param $callback Callback function to check with.
|
|
public static function walk($callback,$context)
|
|
{
|
|
$nodeType=$context->nodeType;
|
|
if($nodeType!=XML_ELEMENT_NODE && $nodeType!=XML_DOCUMENT_NODE && $nodeType!=XML_DOCUMENT_FRAG_NODE)
|
|
return;
|
|
|
|
$node=$context->firstChild;
|
|
if($node)
|
|
{
|
|
do
|
|
{
|
|
walkLoop:
|
|
//$next=$node->nextSibling;
|
|
$result=$callback($node);
|
|
|
|
if(!$result)
|
|
return;
|
|
elseif(!$result->isSameNode($node))
|
|
{
|
|
$node=$result;
|
|
//if($result===$next)
|
|
goto walkLoop;
|
|
}
|
|
|
|
if($node->nodeType==XML_ELEMENT_NODE)
|
|
static::walk($callback,$node);
|
|
}
|
|
while($node=$node->nextSibling);
|
|
}
|
|
}
|
|
|
|
# Walk sideways through the DOM and perform actions.
|
|
# @param $context Node to check.
|
|
# @param $callback Callback function to check with.
|
|
public static function sideWalk($callback,$context)
|
|
{
|
|
$nodeType=$context->nodeType;
|
|
if($nodeType!=XML_ELEMENT_NODE && $nodeType!=XML_DOCUMENT_NODE && $nodeType!=XML_DOCUMENT_FRAG_NODE)
|
|
static::fatalError('domdocument expected',__METHOD__);
|
|
|
|
while($node=$node->nextSibling)
|
|
{
|
|
sideWalkLoop:
|
|
$result=$callback($node);
|
|
if(!$result)
|
|
return;
|
|
elseif(!$result->isSameNode($node))
|
|
{
|
|
$node=$result;
|
|
goto sideWalkLoop;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Walk through the provided node's children and perform actions.
|
|
# @param $context Node to check.
|
|
# @param $callback Callback function to check with.
|
|
public static function stroll($callback,$context)
|
|
{
|
|
$nodeType=$context->nodeType;
|
|
if($nodeType!=XML_ELEMENT_NODE && $nodeType!=XML_DOCUMENT_NODE && $nodeType!=XML_DOCUMENT_FRAG_NODE)
|
|
static::fatalError('domdocument expected',__METHOD__);
|
|
|
|
$node=$context->firstChild;
|
|
if($node)
|
|
{
|
|
do
|
|
{
|
|
strollLoop:
|
|
$result=$callback($node);
|
|
if(!$result)
|
|
return;
|
|
elseif(!$result->isSameNode($node))
|
|
{
|
|
$node=$result;
|
|
goto strollLoop;
|
|
}
|
|
}
|
|
while($node=$node->nextSibling);
|
|
}
|
|
}
|
|
|
|
public static function moonWalk($callback,$context)
|
|
{
|
|
if(!$context->nodeType)
|
|
static::fatalError('domnode expected',__METHOD__,'null');
|
|
|
|
while($context=$context->parentNode)
|
|
{
|
|
moonWalkLoop:
|
|
$result=$callback($context);
|
|
if(!$result)
|
|
return;
|
|
elseif(!$result->isSameNode($context))
|
|
{
|
|
$context=$result;
|
|
goto moonWalkLoop;
|
|
}
|
|
}
|
|
}
|
|
|
|
protected static function escapeString($string,$attrMode=false)
|
|
{
|
|
if($string=='')
|
|
return $string;
|
|
|
|
# preg_replaces are necessary because the document is unicode. There's no
|
|
# mb_str_replace, and using regular str_replaces can present problems.
|
|
$string=preg_replace(array('/&/u','/\xa0/u'),array('&',' '),$string);
|
|
//$string=str_replace(array('&',"\xa0"),array('&',' '),$string);
|
|
|
|
if($attrMode===false)
|
|
$string=preg_replace(array('/</u','/>/u'),array('<','>'),$string);
|
|
elseif($attrMode=='"')
|
|
$string=preg_replace('/"/u','"',$string);
|
|
elseif($attrMode=="'")
|
|
$string=preg_replace('/\'/u',''',$string);
|
|
elseif($attrMode==='' || is_null($attrMode))
|
|
$string=preg_replace(array('/ /u','/</u','/>/u'),array(' ','<','>'),$string);
|
|
|
|
return $string;
|
|
}
|
|
|
|
# Method to process the input stream.
|
|
# @param $data The data stream to process.
|
|
protected static function processInputStream($data)
|
|
{
|
|
# The spec states to optionally detect the character encoding and then use
|
|
# it. It also states that user agents must support at least UTF-8 and
|
|
# Windows-1252. This will only ever support UTF-8, so we will make sure the
|
|
# input will be UTF-8 before continuing.
|
|
$data=mb_convert_encoding($data,'UTF-8',mb_detect_encoding($data));
|
|
mb_internal_encoding('UTF-8');
|
|
|
|
# Remove byte order mark if present.
|
|
if(substr($data,0,3)==="\xEF\xBB\xBF")
|
|
$data=substr($data,3);
|
|
|
|
# Write errors if control or permanently undefined unicode characters are
|
|
# present. The spec states to just trigger parse errors, but I'm also
|
|
# removing them from the document. It's stupid and inefficient to leave
|
|
# them in there and work around them if they're invalid since the class
|
|
# won't work with dynamically changing documents. The spec specifies to do
|
|
# U+0001 to U+0008 in this step. I'm starting from U+0000 because NULL
|
|
# characters should be removed as well.
|
|
$count=0;
|
|
$data=preg_replace_callback('/(?:[\x00-\x08\x0B\x0E-\x1F\x7F]|\xC2[\x80-\x9F]|\xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF])|\xEF\xB7[\x90-\xAF]|\xEF\xBF[\xBE\xBF]|[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF])/',
|
|
function($matches) use(&$count)
|
|
{
|
|
$count++;
|
|
return '';
|
|
},$data);
|
|
|
|
for($loop=0;$loop<$count;$loop++)
|
|
{static::parseError('control or noncharacters');}
|
|
|
|
# Normalize line breaks. Convert CRLF and CR to LF.
|
|
# Break the document into a unicode friendly array of single characters for
|
|
# tokenization.
|
|
static::$data=preg_split('/(?<!^)(?!$)/u',str_replace(array("\r\n","\r"),"\n",$data));
|
|
|
|
# Set EOF to the string length of the document.
|
|
static::$EOF=sizeof(static::$data);
|
|
}
|
|
|
|
# Returns the next character(s). Consumes them by moving the pointer ahead a specified number of steps.
|
|
# @param $length Number of characters to grab.
|
|
# @param $consume Flag specifying whether to consume. Defaults to true.
|
|
protected static function consume($length=1)
|
|
{
|
|
if($length<=0)
|
|
static::fatalError('invalid consume length',__METHOD__);
|
|
if(static::$pointer+1>static::$EOF)
|
|
return false;
|
|
|
|
$output='';
|
|
$end=static::$pointer+$length;
|
|
for($loop=static::$pointer;$loop<$end;$loop++)
|
|
{$output.=static::$data[$loop];}
|
|
static::$pointer=$end;
|
|
return $output;
|
|
}
|
|
|
|
# Returns the next character(s). It does not move the pointer ahead.
|
|
protected static function peek($length=1)
|
|
{
|
|
if($length<=0)
|
|
static::fatalError('invalid peek length',__METHOD__);
|
|
if(static::$pointer+1>static::$EOF)
|
|
return false;
|
|
|
|
$output='';
|
|
$end=static::$pointer+$length;
|
|
for($loop=static::$pointer;$loop<$end;$loop++)
|
|
{$output.=static::$data[$loop];}
|
|
return $output;
|
|
}
|
|
|
|
# Unconsumes the current consume character.
|
|
protected static function unconsume($length=1)
|
|
{if(static::$pointer<static::$EOF) static::$pointer-=$length;}
|
|
|
|
# Finds the length of the initial segment of a string consisting entirely of
|
|
# characters contained within a given mask. Exists here because PHP's strspn
|
|
# isn't unicode friendly, and there's no mbstring alternative.
|
|
protected static function mb_strspn($match,$start=0,$length=0)
|
|
{
|
|
$output=0;
|
|
|
|
# Break the matching characters into an array of characters. Unicode friendly.
|
|
$match=preg_split('/(?<!^)(?!$)/u',$match);
|
|
while(true)
|
|
{
|
|
$char=static::$data[$start];
|
|
if($char=='')
|
|
break;
|
|
if(!in_array($char,$match))
|
|
break;
|
|
|
|
$output++;
|
|
$start++;
|
|
|
|
if($output==$length)
|
|
break;
|
|
}
|
|
return $output;
|
|
}
|
|
|
|
# Find length of initial segment not matching mask. Exists here because PHP's
|
|
# strcspn isn't unicode friendly, and there's no mbstring alternative.
|
|
protected static function mb_strcspn($match,$start=0,$length=0)
|
|
{
|
|
$output=0;
|
|
|
|
# Break the matching characters into an array of characters. Unicode friendly.
|
|
$match=preg_split('/(?<!^)(?!$)/u',$match);
|
|
while(true)
|
|
{
|
|
if (!isset(static::$data[$start]) || static::$data[$start] == '' || in_array(static::$data[$start],$match)) {
|
|
break;
|
|
}
|
|
|
|
$output++;
|
|
$start++;
|
|
|
|
if($output==$length)
|
|
break;
|
|
}
|
|
return $output;
|
|
}
|
|
|
|
protected static function consumeWhile($match,$limit=0)
|
|
{
|
|
if(static::$pointer>static::$EOF)
|
|
return false;
|
|
|
|
$length=static::mb_strspn($match,static::$pointer,$limit);
|
|
|
|
$output='';
|
|
$end=static::$pointer+$length;
|
|
for($loop=static::$pointer;$loop<$end;$loop++)
|
|
{$output.=static::$data[$loop];}
|
|
|
|
static::$pointer+=$length;
|
|
|
|
return $output;
|
|
}
|
|
|
|
protected static function consumeUntil($match,$limit=0)
|
|
{
|
|
if(static::$pointer>static::$EOF)
|
|
return false;
|
|
|
|
$length=static::mb_strcspn($match,static::$pointer,$limit);
|
|
|
|
$output='';
|
|
$end=static::$pointer+$length;
|
|
for($loop=static::$pointer;$loop<$end;$loop++)
|
|
{$output.=static::$data[$loop];}
|
|
|
|
static::$pointer+=$length;
|
|
|
|
return $output;
|
|
}
|
|
|
|
protected static function peekWhile($match,$limit=0)
|
|
{
|
|
if(static::$pointer>static::$EOF)
|
|
return false;
|
|
|
|
$length=static::mb_strspn($match,static::$pointer,$limit);
|
|
|
|
$output='';
|
|
$end=static::$pointer+$length;
|
|
for($loop=static::$pointer;$loop<$end;$loop++)
|
|
{$output.=static::$data[$loop];}
|
|
|
|
return $output;
|
|
}
|
|
|
|
protected static function peekUntil($match,$limit=0)
|
|
{
|
|
if(static::$pointer>static::$EOF)
|
|
return false;
|
|
|
|
$length=static::mb_strcspn($match,static::$pointer,$limit);
|
|
|
|
$output='';
|
|
$end=static::$pointer+$length;
|
|
for($loop=static::$pointer;$loop<$end;$loop++)
|
|
{$output.=static::$data[$loop];}
|
|
|
|
return $output;
|
|
}
|
|
|
|
# Consumes a character reference.
|
|
protected static function consumeEntity($allowedChar=false,$inattr=false)
|
|
{
|
|
# Grab the next character without consuming.
|
|
$char=static::peek();
|
|
|
|
# Optimization: When the spec states to return nothing this function will
|
|
# return '&' as every use of this function checks to see if it returns
|
|
# nothing then tells it to substitute an ampersand. Common sense.
|
|
|
|
# If the next character is one of: U+0009 CHARACTER TABULATION,
|
|
# U+000A LINE FEED (LF), U+000C FORM FEED (FF), U+0020 SPACE,
|
|
# U+003C LESS-THAN, U+0026 AMPERSAND, or EOF it's not a character
|
|
# reference. Return nothing.
|
|
if($char=="\x09" || $char=="\x0A" || $char=="\x0C" || $char=="\x20" || $char=='<' || $char=='&' || $char===false || $char==$allowedChar)
|
|
return '&';
|
|
|
|
switch($char)
|
|
{
|
|
case '#':
|
|
{
|
|
# If the next character is a number sign consume it.
|
|
static::consume();
|
|
|
|
# Grab the next character without consuming.
|
|
$char=static::peek();
|
|
if($char==='x' || $char==='X')
|
|
{
|
|
# If the next character is 'x' or 'X' consume it.
|
|
static::consume();
|
|
|
|
# Consume the following as a hexadecimal number.
|
|
$number=static::consumeWhile(static::HEX);
|
|
|
|
$hex=true;
|
|
}
|
|
else
|
|
{
|
|
# Consume the following as a decimal number.
|
|
$number=static::consumeWhile(static::DIGIT);
|
|
$hex=false;
|
|
}
|
|
|
|
if($number==='' || $number===false)
|
|
{
|
|
# If nothing is matched then trigger a parse error.
|
|
static::parseError('numeric entity expected');
|
|
|
|
# Return nothing.
|
|
return '&';
|
|
}
|
|
else
|
|
{
|
|
# If the next character is a semicolon then consume it otherwise
|
|
# trigger a parse error.
|
|
$check=static::peek();
|
|
if($check==';')
|
|
static::consume();
|
|
else
|
|
static::parseError('semicolon terminator expected');
|
|
|
|
# Interpret the number as either a hexadecimal or decimal number.
|
|
$number=($hex) ? hexdec($number) : (int)$number;
|
|
|
|
# If the number is a key in the above array then trigger a parse error.
|
|
if(isset(static::$entityReplacementTable[$number]))
|
|
{
|
|
static::parseError('invalid entity');
|
|
|
|
# Return the character which corresponds to the number key in the array.
|
|
return static::$entityReplacementTable[$number];
|
|
}
|
|
elseif(($number>=0x0000 && $number<=0x0008) || $number===0x000B ||
|
|
($number>=0x000E && $number<=0x001F) ||
|
|
($number>=0x007F && $number<=0x009F) ||
|
|
($number>=0xD800 && $number<=0xDFFF) ||
|
|
($number & 0xFFFE)===0xFFFE || $number>0x10FFFF)
|
|
{
|
|
static::parseError('illegal codepoint');
|
|
|
|
# Return a replacement character.
|
|
return "\xEF\xBF\xBD";
|
|
}
|
|
|
|
# Return the character which corresponds to the numerical codepoint.
|
|
return mb_convert_encoding(pack("N",$number),'UTF8','UCS-4BE');
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
{
|
|
# Named character references.
|
|
|
|
# Grab as many alphanumeric characters as possible up until the string
|
|
# length of the longest named character reference. Calculated using:
|
|
# max(array_map('strlen',array_keys($namedrefs)));
|
|
# No need to calculate it every time as the array is static.
|
|
$char=static::peekWhile(static::DIGIT.static::ALPHA.';',32);
|
|
|
|
# Lob a character off the end of the grabbed string until a match is found.
|
|
$charArray=preg_split('/(?<!^)(?!$)/u',$char);
|
|
$len=mb_strlen($char,'UTF-8');
|
|
$key=$char;
|
|
for($loop=$len;$loop>0;$loop--)
|
|
{
|
|
if(isset(static::$entities[$key]))
|
|
{
|
|
# If the string's last character is not a semicolon then trigger a parse
|
|
# error.
|
|
$end=end($charArray);
|
|
if($end!==';')
|
|
{
|
|
static::parseError('semicolon terminator expected',$end);
|
|
|
|
# If in an attribute and the next character matches [A-Za-z0-9=]
|
|
# return nothing.
|
|
if($inattr && preg_match('/[A-Za-z0-9=]/',$key))
|
|
return '&';
|
|
}
|
|
|
|
# Consume the character reference now that we know everything's okay.
|
|
static::consume($loop);
|
|
|
|
# Return the character(s) the character reference references.
|
|
return static::$entities[$key];
|
|
}
|
|
array_pop($charArray);
|
|
$key=implode($charArray);
|
|
}
|
|
|
|
# If characters immediately after the ampersand match '[A-Za-z0-9]+;'
|
|
# trigger an error.
|
|
if(preg_match('/^[A-Za-z0-9]+;/',$char)>0)
|
|
static::parseError('invalid named entity');
|
|
|
|
# Consume nothing and return nothing if no valid named reference was found.
|
|
return '&';
|
|
}
|
|
}
|
|
}
|
|
|
|
# Emits a token to the DOM tree.
|
|
# @param $token The token to emit.
|
|
# @param $mode Mode to emit the token under. Defaults to null.
|
|
protected static function emitToken($token,$mode=null)
|
|
{
|
|
if(static::$debug) {
|
|
echo "token: ";
|
|
var_export($token);
|
|
echo "\n";
|
|
}
|
|
|
|
# Unset and extract the array so tons of comparisons aren't done on slower arrays.
|
|
$type = null;
|
|
$data = null;
|
|
$name = null;
|
|
$attributes = null;
|
|
$selfClosing = null;
|
|
$quirksMode = null;
|
|
$publicID = null;
|
|
$systemID = null;
|
|
|
|
extract($token);
|
|
|
|
if(is_null($mode))
|
|
$mode=static::$mode;
|
|
# Optimization. There's no need to check if it's HTML or foreign content if
|
|
# a mode is given. It'll always be HTML content.
|
|
else
|
|
goto htmlContent;
|
|
|
|
# This looks thoroughly insane, but it's a lot faster than doing it the
|
|
# conventional way.
|
|
$currentNodeNamespace = (static::$currentNode) ? static::$currentNode->namespaceURI : null;
|
|
|
|
if(static::$currentNode===false || $type=='eof' || $currentNodeNamespace==null)
|
|
goto htmlContent;
|
|
else
|
|
{
|
|
if($currentNodeNamespace=='http://www.w3.org/1998/Math/MathML')
|
|
{
|
|
if(((static::$currentNodeName=='mi' || static::$currentNodeName=='mo' || static::$currentNodeName=='mn' || static::$currentNodeName=='ms' || static::$currentNodeName=='mtext') &&
|
|
($type=='character' || ($type=='start tag' && ($name!='mglyph' && $name!='malignmark')))))
|
|
goto htmlContent;
|
|
elseif(static::$currentNodeName=='annotation-xml')
|
|
{
|
|
$currentNodeEncoding=strtolower(static::$currentNode->getAttribute('encoding'));
|
|
if(($type=='start tag' && $name=='svg') || ($currentNodeEncoding=='text/html' || $currentNodeEncoding=='application/xhtml+xml'))
|
|
goto htmlContent;
|
|
}
|
|
}
|
|
elseif($currentNodeNamespace=='http://www.w3.org/2000/svg' && (static::$currentNodeName=='foreignObject' || static::$currentNodeName=='desc' || static::$currentNodeName=='title') &&
|
|
($type=='start tag' || $type=='character'))
|
|
goto htmlContent;
|
|
}
|
|
|
|
# Foreign content. Algorithm goes here if none of the checks above go to
|
|
# htmlContent.
|
|
foreignContent:
|
|
{
|
|
static::$htmlContent=false;
|
|
if($type=='character')
|
|
{
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
if($data!="\t" && $data!="\n" && $data!="\x0c" && $data!="\x0d" && $data!=' ')
|
|
static::$framesetOk=false;
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if(($name=='b' || $name=='big' || $name=='blockquote' || $name=='body' || $name=='br' || $name=='center' || $name=='code' || $name=='dd' || $name=='div' || $name=='dl' ||
|
|
$name=='dt' || $name=='em' || $name=='embed' || $name=='h1' || $name=='h2' || $name=='h3' || $name=='h4' || $name=='h5' || $name=='h6' || $name=='head' || $name=='hr' ||
|
|
$name=='i' || $name=='img' || $name=='li' || $name=='listing' || $name=='menu' || $name=='meta' || $name=='nobr' || $name=='ol' || $name=='p' || $name=='pre' ||
|
|
$name=='ruby' || $name=='s' || $name=='small' || $name=='span' || $name=='strong' || $name=='strike' || $name=='sub' || $name=='sup' || $name=='table' || $name=='tt' ||
|
|
$name=='u' || $name=='ul' || $name=='var') || ($name=='font' && (isset($attributes['color']) || isset($attributes['face']) || isset($attributes['size']))))
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
|
|
# Pop an element from the stack of open elements, and then keep popping
|
|
# more elements from the stack of open elements until the current node
|
|
# is a MathML text integration point, an HTML integration point, or an
|
|
# element in the HTML namespace.
|
|
static::stackPop();
|
|
while(true)
|
|
{
|
|
$namespace=static::$currentNodeName->namespaceURI;
|
|
|
|
# HTML namespace.
|
|
if($namespace==null)
|
|
break;
|
|
else
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
if($namespace=='http://www.w3.org/1998/Math/MathML')
|
|
{
|
|
# MathML text integration point.
|
|
if($nodeName=='mi' || $nodeName=='mo' || $nodeName=='mn' || $nodeName=='ms' || $nodeName=='mtext')
|
|
break;
|
|
# HTML integration point.
|
|
elseif($nodeName=='annotation-xml')
|
|
{
|
|
$encoding=strtolower($nodeName->getAttribute('encoding'));
|
|
if($encoding=='text/html' || $encoding=='application/xhtml+xml')
|
|
break;
|
|
}
|
|
}
|
|
# HTML integration point.
|
|
elseif($namespace=='http://www.w3.org/2000/svg' && ($nodeName=='foreignObject' || $nodeName=='desc' || $nodeName=='title'))
|
|
break;
|
|
}
|
|
static::stackPop();
|
|
}
|
|
|
|
goto reprocessToken;
|
|
}
|
|
else
|
|
{
|
|
# If the current node is an element in the MathML namespace, adjust
|
|
# MathML attributes for the token. (This fixes the case of MathML
|
|
# attributes that are not all lowercase.)
|
|
if($currentNodeNamespace=='http://www.w3.org/1998/Math/MathML')
|
|
{
|
|
if(isset($attributes['definitionurl']))
|
|
{
|
|
$token['attributes']['definitionURL']=$attributes['definitionurl'];
|
|
unset($token['attributes']['definitionurl']);
|
|
}
|
|
}
|
|
elseif($currentNodeNamespace=='http://www.w3.org/2000/svg')
|
|
{
|
|
# If the current node is an element in the SVG namespace, and the
|
|
# token's tag name is one of the ones in the first column of the
|
|
# following table, change the tag name to the name given in the
|
|
# corresponding cell in the second column. (This fixes the case of SVG
|
|
# elements that are not all lowercase.)
|
|
if(isset(static::$svgElements[$name]))
|
|
{
|
|
$token['name']=static::$svgElements[$name];
|
|
$name=$token['name'];
|
|
}
|
|
# If the current node is an element in the SVG namespace, adjust SVG
|
|
# attributes for the token. (This fixes the case of SVG attributes that
|
|
# are not all lowercase.)
|
|
else
|
|
{
|
|
if(isset($attributes))
|
|
{
|
|
foreach($attributes as $key=>$value)
|
|
{
|
|
if(isset(static::$svgAttributes[$key]))
|
|
{
|
|
$token['attributes'][static::$svgAttributes[$key]]=$value;
|
|
unset($token['attributes'][$key]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
# Optimization. Foreign attributes are adjusted as they're entered into
|
|
# the DOM in this implementation.
|
|
|
|
# Insert a foreign element for the token, in the same namespace as the
|
|
# current node.
|
|
$token['namespace']=$currentNodeNamespace;
|
|
|
|
# If the token has its self-closing flag set, pop the current node off
|
|
# the stack of open elements and acknowledge the token's self-closing
|
|
# flag.
|
|
# Can't acknowledge the self-closing flag.
|
|
static::insertElement($token,(isset($token['selfClosing']) && $token['selfClosing']) ? false : true);
|
|
}
|
|
}
|
|
# Scripting isn't supported in this implementation, so script end tags get
|
|
# processed like any other end tag.
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
# If node is not an element with the same tag name as the token, then
|
|
# this is a parse error.
|
|
if(static::$currentNodeName!=$name)
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
# Initialize node to be the current node (the bottommost node of the
|
|
# stack).
|
|
$node=static::$currentNode;
|
|
$nodeName=static::$currentNodeName;
|
|
|
|
# Loop:
|
|
while(true)
|
|
{
|
|
# If node's tag name, converted to ASCII lowercase, is the same as the
|
|
# tag name of the token, pop elements from the stack of open elements
|
|
# until node has been popped from the stack, and then jump to the last
|
|
# step of this list of steps.
|
|
# No need to convert to lowercase here?
|
|
if(static::$currentNodeName==$name)
|
|
{
|
|
# This has to be an error in the spec. If the element is popped from the
|
|
# stack of open elements here then when told to process the token
|
|
# according to the rules of the current insertion mode in HTML content
|
|
# then the algorithm will attempt again to pop the element from the stack
|
|
# of open elements.
|
|
|
|
# This implementation goes straight to reprocessing the token.
|
|
|
|
/* while(true)
|
|
{
|
|
$poppedNode=static::$currentNode;
|
|
static::stackPop();
|
|
if($poppedNode===$node)
|
|
goto reprocessToken;
|
|
} */
|
|
|
|
goto reprocessToken;
|
|
}
|
|
|
|
# Set node to the previous entry in the stack of open elements.
|
|
$node=static::$stack[static::$stackSize-2];
|
|
|
|
# If node is not an element in the HTML namespace, return to the step
|
|
# labeled loop.
|
|
if(!is_null($node->namespaceURI))
|
|
continue;
|
|
|
|
# Otherwise, process the token according to the rules given in the
|
|
# section corresponding to the current insertion mode in HTML content.
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
|
|
# Return here so the code below doesn't get processed.
|
|
return true;
|
|
}
|
|
|
|
# Used for reprocessing tokens. Using a goto is a whole lot faster and uses
|
|
# a lot less memory than recursively calling this method over and over again.
|
|
# This won't ever be executed unless told to go to this point in the program.
|
|
reprocessToken:
|
|
$mode=static::$mode;
|
|
|
|
htmlContent:
|
|
{
|
|
static::$htmlContent=true;
|
|
|
|
# HTML content.
|
|
switch($mode)
|
|
{
|
|
case 'initial':
|
|
{
|
|
if($type=='character' && ($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' '))
|
|
continue;
|
|
# Too much work involved to allow for comments before the DOCTYPE using the DOM, so they're ignored.
|
|
elseif($type=='comment')
|
|
continue;
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
if($name!='html' || isset($publicID) || (isset($systemID) && $systemID=='about:legacy-compat') ||
|
|
!($name=='html' || $publicID=='-/W3C//DTD HTML 4.0//EN' || (!isset($systemID) || $systemID=='http://www.w3.org/TR/REC-html40/strict.dtd')) ||
|
|
!($name=='html' || $publicID=='-//W3C//DTD HTML 4.01//EN' || (!isset($systemID) || $systemID=='http://www.w3.org/TR/html4/strict.dtd')) ||
|
|
!($name=='html' || $publicID=='-//W3C//DTD XHTML 1.0 Strict//EN' || (!isset($systemID) || $systemID=='http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd')) ||
|
|
!($name=='html' || $publicID=='-//W3C//DTD XHTML 1.1//EN' || (!isset($systemID) || $systemID=='http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd')))
|
|
static::parseError('invalid doctype');
|
|
|
|
# Append a DocumentType node to the Document node.
|
|
# PHP's DOM can't just do that, so a document is created with the
|
|
# specified DOCTYPE.
|
|
# Deviation: PHP's DOMImplementation::createDocumentType() method stupidly
|
|
# cannot accept an empty qualified name, so if it is missing it is replaced
|
|
# with 'html'.
|
|
$implementation = new DOMImplementation();
|
|
static::$DOM=$implementation->createDocument(null,null,$implementation->createDocumentType((isset($name)) ? $name : 'html', $publicID, $systemID));
|
|
static::$currentNode=static::$DOM;
|
|
|
|
# For case insensitive comparison.
|
|
$publicID=strtolower($publicID);
|
|
|
|
if($quirksMode===true || $name!='html' ||
|
|
strpos($publicID,'+//silmaril//dtd html pro v0r11 19970101//')===0 ||
|
|
strpos($publicID,'-//advasoft ltd//dtd html 3.0 aswedit + extensions//')===0 ||
|
|
strpos($publicID,'-//as//dtd html 3.0 aswedit + extensions//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 2.0 level 1//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 2.0 level 2//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 2.0 strict level 1//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 2.0 strict level 2//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 2.0 strict//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 2.0//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 2.1e//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 3.0//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 3.2 final//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 3.2//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html 3//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html level 0//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html level 1//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html level 2//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html level 3//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html strict level 0//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html strict level 1//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html strict level 2//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html strict level 3//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html strict//')===0 ||
|
|
strpos($publicID,'-//ietf//dtd html//')===0 ||
|
|
strpos($publicID,'-//metrius//dtd metrius presentational//')===0 ||
|
|
strpos($publicID,'-//microsoft//dtd internet explorer 2.0 html strict//')===0 ||
|
|
strpos($publicID,'-//microsoft//dtd internet explorer 2.0 html//')===0 ||
|
|
strpos($publicID,'-//microsoft//dtd internet explorer 2.0 tables//')===0 ||
|
|
strpos($publicID,'-//microsoft//dtd internet explorer 3.0 html strict//')===0 ||
|
|
strpos($publicID,'-//microsoft//dtd internet explorer 3.0 html//')===0 ||
|
|
strpos($publicID,'-//microsoft//dtd internet explorer 3.0 tables//')===0 ||
|
|
strpos($publicID,'-//netscape comm. corp.//dtd html//')===0 ||
|
|
strpos($publicID,'-//netscape comm. corp.//dtd strict html//')===0 ||
|
|
strpos($publicID,'-//o\'reilly and associates//dtd html 2.0//')===0 ||
|
|
strpos($publicID,'-//o\'reilly and associates//dtd html extended 1.0//')===0 ||
|
|
strpos($publicID,'-//o\'reilly and associates//dtd html extended relaxed 1.0//')===0 ||
|
|
strpos($publicID,'-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//')===0 ||
|
|
strpos($publicID,'-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//')===0 ||
|
|
strpos($publicID,'-//spyglass//dtd html 2.0 extended//')===0 ||
|
|
strpos($publicID,'-//sq//dtd html 2.0 hotmetal + extensions//')===0 ||
|
|
strpos($publicID,'-//sun microsystems corp.//dtd hotjava html//')===0 ||
|
|
strpos($publicID,'-//sun microsystems corp.//dtd hotjava strict html//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html 3 1995-03-24//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html 3.2 draft//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html 3.2 final//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html 3.2//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html 3.2s draft//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html 4.0 frameset//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html 4.0 transitional//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html experimental 19960712//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd html experimental 970421//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd w3 html//')===0 ||
|
|
strpos($publicID,'-//w3o//dtd w3 html 3.0//')===0 ||
|
|
strpos($publicID,'-//webtechs//dtd mozilla html 2.0//')===0 ||
|
|
strpos($publicID,'-//webtechs//dtd mozilla html//')===0 ||
|
|
$publicID=='-//w3o//dtd w3 html strict 3.0//en//' ||
|
|
$publicID=='-/w3c/dtd html 4.0 transitional/en' ||
|
|
$publicID=='html' ||
|
|
$systemID=='http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd' ||
|
|
(!isset($systemID) && strpos($publicID,'-//w3c//dtd html 4.01 frameset//')===0) ||
|
|
(!isset($systemID) && strpos($publicID,'-//w3c//dtd html 4.01 transitional//')===0))
|
|
static::$quirksMode=true;
|
|
elseif(strpos($publicID,'-//w3c//dtd xhtml 1.0 frameset//')===0 ||
|
|
strpos($publicID,'-//w3c//dtd xhtml 1.0 transitional//')===0 ||
|
|
(isset($systemID) && strpos($publicID,'-//w3c//dtd html 4.01 frameset//')===0) ||
|
|
(isset($systemID) && strpos($publicID,'-//w3c//dtd html 4.01 transitional//')===0))
|
|
static::$quirksMode='limited';
|
|
|
|
static::$mode='before html';
|
|
}
|
|
else
|
|
{
|
|
initialAnythingElse:
|
|
# CHECK THIS: Don't think there's an iframe srcdoc document to worry about here.
|
|
static::parseError('doctype expected '.$type,$name);
|
|
|
|
static::$quirksMode=true;
|
|
# Create empty DOM Document.
|
|
static::$DOM=DOMImplementation::createDocument();
|
|
static::$currentNode=static::$DOM;
|
|
|
|
# Reprocess the token in the 'before html' insertion mode.
|
|
static::$mode='before html';
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'before html':
|
|
{
|
|
if($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
{
|
|
# Deviation. This implementation preserves whitespace here.
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
}
|
|
else
|
|
{
|
|
beforeHtmlAnythingElse:
|
|
# Insert an html element.
|
|
static::insertElement(array('type'=>'start tag',
|
|
'name'=>'html'));
|
|
static::$mode='before head';
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
{
|
|
$ook=static::$DOM;
|
|
# Insert an html node into the DOM document.
|
|
static::insertElement($token);
|
|
static::$mode='before head';
|
|
}
|
|
else
|
|
goto beforeHtmlAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='head' || $name=='body' || $name=='html' || $name=='br')
|
|
goto beforeHtmlAnythingElse;
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,'html');
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
goto beforeHtmlAnythingElse;
|
|
}
|
|
break;
|
|
|
|
case 'before head':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
{
|
|
# Deviation. This implementation preserves whitespace here.
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
}
|
|
else
|
|
{
|
|
beforeHeadAnythingElse:
|
|
# Act as if a start tag token with the tag name "head" and no attributes
|
|
# had been seen, then reprocess the current token.
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'head'));
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='head')
|
|
{
|
|
# Insert a head element.
|
|
static::$head=static::insertElement($token);
|
|
static::$mode='in head';
|
|
}
|
|
else
|
|
goto beforeHeadAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='head' || $name=='body' || $name=='html' || $name=='br')
|
|
goto beforeHeadAnythingElse;
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,'head');
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
goto beforeHeadAnythingElse;
|
|
}
|
|
break;
|
|
|
|
case 'in head':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
else
|
|
{
|
|
# Act as if an end tag token with the tag name "head" had been seen, and
|
|
# reprocess the current token.
|
|
inHeadAnythingElse:
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'head'));
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype','head');
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif(in_array($name,static::$headElements))
|
|
{
|
|
# Insert the element and don't add it to the end of the stack.
|
|
static::insertElement($token,false);
|
|
}
|
|
elseif($name=='meta')
|
|
{
|
|
# Deviation. Spec states to grab the character encoding here under
|
|
# certain circumstances, convert everything to it, and start using
|
|
# it. This implementation will only ever support UTF-8, so it will
|
|
# make the metadata here reflect that.
|
|
$attributes['charset']='UTF-8';
|
|
unset($attributes['http-equiv']);
|
|
|
|
# Insert the element and don't add it to the end of the stack.
|
|
static::insertElement($token,false);
|
|
}
|
|
elseif(in_array($name,static::$rcdataHeadElements))
|
|
{
|
|
# Generic RCDATA element parsing algorithm.
|
|
static::insertElement($token);
|
|
static::$state='RCDATA';
|
|
static::$oMode=static::$mode;
|
|
static::$mode='text';
|
|
}
|
|
# Since there's no scripting flag scripting is disabled.
|
|
# As it is disabled the "noscript" enabled scripting flag item isn't
|
|
# present.
|
|
elseif(in_array($name,static::$rawtextHeadElements))
|
|
{
|
|
# Generic raw text element parsing algorithm.
|
|
genericRawTextElementParsingAlgorithm:
|
|
static::insertElement($token);
|
|
static::$state='RAWTEXT';
|
|
static::$oMode=static::$mode;
|
|
static::$mode='text';
|
|
}
|
|
# Since there's no scripting flag there will be no checking for it.
|
|
elseif($name=='noscript')
|
|
{
|
|
static::insertElement($token);
|
|
static::$mode='in head noscript';
|
|
}
|
|
elseif($name=='script')
|
|
{
|
|
static::insertElement($token);
|
|
static::$state='script data';
|
|
static::$oMode=static::$mode;
|
|
static::$mode='text';
|
|
}
|
|
elseif($name=='head')
|
|
static::parseError('unexpected start tag','head','head');
|
|
else
|
|
goto inHeadAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='head')
|
|
{
|
|
static::stackPop();
|
|
static::$mode='after head';
|
|
}
|
|
elseif($name=='body' || $name=='html' || $name=='br')
|
|
goto inHeadAnythingElse;
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
goto inHeadAnythingElse;
|
|
}
|
|
break;
|
|
|
|
case 'in head noscript':
|
|
{
|
|
if($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype','noscript');
|
|
return false;
|
|
}
|
|
elseif($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
else
|
|
{
|
|
static::parseError('unexpected character',$data,'noscript');
|
|
|
|
# Act as if an end tag with the tag name "noscript" had been seen and
|
|
# reprocess the current token.
|
|
inHeadNoscriptAnythingElse:
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'noscript'));
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='basefont' || $name=='bgsound' || $name=='link' || $name=='meta' || $name=='noframes' || $name=='style')
|
|
static::emitToken($token,'in head');
|
|
elseif($name=='head' || $name=='noscript')
|
|
static::parseError('unexpected start tag',$name,'noscript');
|
|
else
|
|
{
|
|
static::parseError('unexpected start tag',$name,'noscript');
|
|
goto inHeadNoscriptAnythingElse;
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='noscript')
|
|
{
|
|
static::stackPop();
|
|
static::$mode='in head';
|
|
}
|
|
elseif($name=='br')
|
|
{
|
|
static::parseError('unexpected end tag','br','noscript');
|
|
goto inHeadNoscriptAnythingElse;
|
|
}
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
static::parseError('unexpected eof',static::$currentNodeName);
|
|
goto inHeadNoscriptAnythingElse;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after head':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
else
|
|
{
|
|
# Act as if a start tag token with the tag name "body" and no attributes
|
|
# had been seen.
|
|
afterHeadAnythingElse:
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'body'));
|
|
static::$framesetOk=true;
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected DOCTYPE',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='body')
|
|
{
|
|
static::insertElement($token);
|
|
static::$framesetOk=false;
|
|
static::$mode='in body';
|
|
}
|
|
elseif($name=='frameset')
|
|
{
|
|
static::insertElement($token);
|
|
static::$mode='in frameset';
|
|
}
|
|
elseif($name=='meta' || $name=='noframes' || $name=='script' || $name=='style' || $name=='title' || in_array($name,static::$headElements))
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
static::stackPush(static::$head);
|
|
$headPos=static::$stackSize-1;
|
|
static::emitToken($token,'in head');
|
|
|
|
# Remove the node pointed to by the head element pointer from the stack
|
|
# of open elements.
|
|
static::stackSlice($headPos);
|
|
}
|
|
elseif($name=='head')
|
|
static::parseError('unexpected start tag','head',static::$currentNodeName);
|
|
else
|
|
goto afterHeadAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='body' || $name=='html' || $name=='br')
|
|
goto afterHeadAnythingElse;
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
goto afterHeadAnythingElse;
|
|
}
|
|
break;
|
|
|
|
case 'in body':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
static::activeReconstruct();
|
|
$char=static::$DOM->createTextNode($data);
|
|
|
|
# Foster parenting stuff here for the purpose of processing tables. Described
|
|
# first in §13.2.5.4.9 under "Anything Else". This implementation uses a flag
|
|
# to determine if foster parenting is necessary.
|
|
# The "in table text" insertion mode will sometimes send characters this way.
|
|
if(static::$fosterParenting && (static::$currentNodeName=='table' || static::$currentNodeName=='tbody' ||
|
|
static::$currentNodeName=='tfoot' || static::$currentNodeName=='thead' ||
|
|
static::$currentNodeName=='tr'))
|
|
static::fosterParent($char);
|
|
else
|
|
static::$currentNode->appendChild($char);
|
|
|
|
if($data!="\t" && $data!="\n" && $data!="\x0c" && $data!="\x0d" && $data!=' ')
|
|
$framesetOk=false;
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
# For each attribute on the token check to see if the attribute is
|
|
# already present on the top element of the stack of open elements.
|
|
# If it is not, add the attribute and its corresponding value to
|
|
# that element.
|
|
$topElement=static::$stack[0];
|
|
if(isset($attributes))
|
|
{
|
|
foreach($attributes as $key=>$value)
|
|
{
|
|
if($topElement->getAttribute($key)=='')
|
|
$topElement->setAttribute($key,(is_null($value)) ? $key : $value);
|
|
}
|
|
}
|
|
}
|
|
elseif($name=='base' || $name=='basefont' || $name=='bgsound' || $name=='command' || $name=='link' ||
|
|
$name=='meta' || $name=='noframes' || $name=='script' || $name=='style' || $name=='title')
|
|
static::emitToken($token,'in head');
|
|
elseif($name=='body')
|
|
{
|
|
static::parseError('unexpected start tag','body',static::$currentNodeName);
|
|
|
|
$secondElement=static::$stack[1];
|
|
if(static::$fragment===true)
|
|
{
|
|
# If the second element on the stack of open elements is not a body
|
|
# element, or, if the stack of open elements has only one node on it,
|
|
# then ignore the token. (fragment case)
|
|
if($secondElement->nodeName!='body' || static::$stackSize==1)
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
static::$framesetOk=false;
|
|
|
|
# For each attribute on the token, check to see if the attribute is
|
|
# already present on the body element (the second element) on the
|
|
# stack of open elements, and if it is not, add the attribute and its
|
|
# corresponding value to that element.
|
|
if(isset($attributes))
|
|
{
|
|
foreach($attributes as $key=>$value)
|
|
{
|
|
if($secondElement->getAttribute($key)=='')
|
|
$secondElement->setAttribute($key,(is_null($value)) ? $key : $value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
elseif($name=='frameset')
|
|
{
|
|
static::parseError('unexpected start tag','frameset',static::$currentNodeName);
|
|
|
|
if(static::$fragment===true)
|
|
{
|
|
# If the second element on the stack of open elements is not a body
|
|
# element, or, if the stack of open elements has only one node on it,
|
|
# then ignore the token. (fragment case)
|
|
# If the frameset-ok flag is set to "not ok", ignore the token.
|
|
$secondElement=static::$stack[1];
|
|
if(!$framesetOk || $secondElement->nodeName!='body' || static::$stackSize==1)
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
# Remove the second element on the stack of open elements from its
|
|
# parent node, if it has one.
|
|
$secondElement->parentNode->removeChild($secondElement);
|
|
|
|
# Pop all the nodes from the bottom of the stack of open elements,
|
|
# from the current node up to, but not including, the root html
|
|
# element.
|
|
$firstElement=static::$stack[0];
|
|
static::$stack=array($firstElement);
|
|
static::$currentNode=$firstElement;
|
|
static::$currentNodeName=$firstElement->nodeName;
|
|
|
|
# Insert an HTML element for the token.
|
|
static::insertElement($token);
|
|
static::$mode='in frameset';
|
|
}
|
|
}
|
|
elseif($name=='address' || $name=='article' || $name=='aside' || $name=='blockquote' ||
|
|
$name=='center' || $name=='details' || $name=='dialog' || $name=='dir' || $name=='div' ||
|
|
$name=='dl' || $name=='fieldset' || $name=='figcaption' || $name=='figure' ||
|
|
$name=='footer' || $name=='header' || $name=='hgroup' || $name=='main' || $name=='menu' || $name=='nav' ||
|
|
$name=='ol' || $name=='p' || $name=='section' || $name=='summary' || $name=='ul')
|
|
{
|
|
# If the stack of open elements has a p element in button scope then act
|
|
# as if an end tag with the tag name "p" had been seen.
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='h1' || $name=='h2' || $name=='h3' || $name=='h4' || $name=='h5' || $name=='h6')
|
|
{
|
|
# If the stack of open elements has a p element in button scope then act
|
|
# as if an end tag with the tag name "p" had been seen.
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
if(static::$currentNodeName=='h1' || static::$currentNodeName=='h2' || static::$currentNodeName=='h3' ||
|
|
static::$currentNodeName=='h4' || static::$currentNodeName=='h5' || static::$currentNodeName=='h6')
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
static::stackPop();
|
|
}
|
|
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='pre' || $name=='listing')
|
|
{
|
|
# If the stack of open elements has a p element in button scope then act
|
|
# as if an end tag with the tag name "p" had been seen.
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::insertElement($token);
|
|
|
|
# If the next token is a U+000A LINE FEED (LF) character token, then
|
|
# ignore that token and move on to the next one. (Newlines at the
|
|
# start of pre blocks are ignored as an authoring convenience.)
|
|
if(static::peek()=="\n")
|
|
static::consume();
|
|
|
|
static::$framesetOk=false;
|
|
}
|
|
elseif($name=='form')
|
|
{
|
|
if(static::$form!=null)
|
|
{
|
|
static::parseError('unexpected start tag','form','form');
|
|
return false;
|
|
}
|
|
|
|
# If the stack of open elements has a p element in button scope then act
|
|
# as if an end tag with the tag name "p" had been seen.
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::$form=static::insertElement($token);
|
|
}
|
|
elseif($name=='li')
|
|
{
|
|
static::$framesetOk=false;
|
|
|
|
$node=static::$currentNode;
|
|
$nodeName=static::$currentNodeName;
|
|
$key=static::$stackSize-1;
|
|
|
|
while(true)
|
|
{
|
|
# If node is an li element, then act as if an end tag with the tag name
|
|
# "li" had been seen, then jump to the last step.
|
|
if($nodeName=='li')
|
|
{
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'li'));
|
|
break;
|
|
}
|
|
# If node is in the special category, but is not an address, div, or p
|
|
# element, then jump to the last step.
|
|
if(($nodeName!='address' || $nodeName!='div' || $nodeName!='p') && static::isSpecial($node))
|
|
break;
|
|
# Otherwise, set node to the previous entry in the stack of open
|
|
# elements and return to the step labeled loop.
|
|
else
|
|
{
|
|
$key--;
|
|
$node=static::$stack[$key];
|
|
$nodeName=$node->nodeName;
|
|
}
|
|
}
|
|
|
|
# This is the last step.
|
|
# If the stack of open elements has a p element in button scope then act
|
|
# as if an end tag with the tag name "p" had been seen.
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='dd' || $name=='dt')
|
|
{
|
|
static::$framesetOk=false;
|
|
|
|
$node=static::$currentNode;
|
|
$nodeName=static::$currentNodeName;
|
|
$key=static::$stackSize-1;
|
|
|
|
while(true)
|
|
{
|
|
# If node is a dd or dt element, then act as if an end tag with the
|
|
# same tag name as node had been seen, then jump to the last step.
|
|
if($nodeName=='dd' || $nodeName=='dt')
|
|
{
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>$nodeName));
|
|
|
|
break;
|
|
}
|
|
# If node is in the special category, but is not an address, div, or p
|
|
# element, then jump to the last step.
|
|
if(($nodeName!='address' || $nodeName!='div' || $nodeName!='p') && static::isSpecial($node))
|
|
break;
|
|
# Otherwise, set node to the previous entry in the stack of open
|
|
# elements and return to the step labeled loop.
|
|
else
|
|
{
|
|
$key--;
|
|
$node=static::$stack[$key];
|
|
$nodeName=$node->nodeName;
|
|
}
|
|
}
|
|
|
|
# This is the last step.
|
|
# If the stack of open elements has a p element in button scope then act
|
|
# as if an end tag with the tag name "p" had been seen.
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='plaintext')
|
|
{
|
|
# If the stack of open elements has a p element in button scope then act
|
|
# as if an end tag with the tag name "p" had been seen.
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::insertElement($token);
|
|
static::$state='PLAINTEXT';
|
|
}
|
|
elseif($name=='button')
|
|
{
|
|
# If the stack of open elements has a button element in scope, then this
|
|
# is a parse error; act as if an end tag with the tag name "button" had
|
|
# been seen, then reprocess the token.
|
|
if(static::inScope('button'))
|
|
{
|
|
static::parseError('unexpected start tag','button','button');
|
|
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'button'));
|
|
|
|
goto reprocessToken;
|
|
}
|
|
else
|
|
{
|
|
static::activeReconstruct();
|
|
static::insertElement($token);
|
|
static::$framesetOk=false;
|
|
}
|
|
}
|
|
elseif($name=='a')
|
|
{
|
|
# If the list of active formatting elements contains an element whose
|
|
# tag name is "a" between the end of the list and the last marker on the
|
|
# list (or the start of the list if there is no marker on the list),
|
|
# then this is a parse error; act as if an end tag with the tag name "a"
|
|
# had been seen, then remove that element from the list of active
|
|
# formatting elements and the stack of open elements if the end tag
|
|
# didn't already remove it (it might not have if the element is not in
|
|
# table scope).
|
|
for($loop=static::$activeSize;$loop<0;$loop--)
|
|
{
|
|
$current=static::$active[$loop];
|
|
if(is_string($current))
|
|
break;
|
|
elseif($current->nodeName=='a')
|
|
{
|
|
static::parseError('unexpected start tag','a','a');
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'a'));
|
|
|
|
static::activeSplice($loop);
|
|
|
|
$stackPos=array_search($current,static::$stack,true);
|
|
if($stackPos!==false)
|
|
static::stackSplice($stackPos,1);
|
|
}
|
|
}
|
|
|
|
static::activeReconstruct();
|
|
static::activePush(static::insertElement($token),$token);
|
|
}
|
|
elseif($name=='b' || $name=='big' || $name=='code' || $name=='em' || $name=='font' || $name=='i' ||
|
|
$name=='s' || $name=='small' || $name=='strike' || $name=='strong' || $name=='tt' ||
|
|
$name=='u')
|
|
{
|
|
static::activeReconstruct();
|
|
|
|
static::activePush(static::insertElement($token),$token);
|
|
}
|
|
elseif($name=='nobr')
|
|
{
|
|
static::activeReconstruct();
|
|
|
|
if(static::inScope('nobr'))
|
|
{
|
|
static::parseError('unexpected start tag','nobr','nobr');
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'nobr'));
|
|
static::activeReconstruct();
|
|
}
|
|
|
|
static::activePush(static::insertElement($token),$token);
|
|
}
|
|
elseif($name=='applet' || $name=='marquee' || $name=='object')
|
|
{
|
|
static::activeReconstruct();
|
|
|
|
static::insertElement($token);
|
|
static::activePush($name);
|
|
static::$framesetOk=false;
|
|
}
|
|
elseif($name=='table')
|
|
{
|
|
if(!static::$quirksMode && static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
static::insertElement($token);
|
|
static::$framesetOk=false;
|
|
static::$mode='in table';
|
|
}
|
|
elseif($name=='area' || $name=='br' || $name=='embed' || $name=='img' || $name=='keygen' || $name=='wbr')
|
|
{
|
|
static::activeReconstruct();
|
|
static::insertElement($token);
|
|
# Immediately pop the current node off the stack of open elements.
|
|
static::stackPop();
|
|
# Can't acknowledge the self-closing flag.
|
|
static::$framesetOk=false;
|
|
}
|
|
elseif($name=='input')
|
|
{
|
|
static::activeReconstruct();
|
|
static::insertElement($token);
|
|
# Immediately pop the current node off the stack of open elements.
|
|
static::stackPop();
|
|
# Can't acknowledge the self-closing flag.
|
|
|
|
# If the token does not have an attribute with the name "type", or if it
|
|
# does, but that attribute's value is not an ASCII case-insensitive
|
|
# match for the string "hidden", then: set the frameset-ok flag to
|
|
# "not ok".
|
|
if(strtolower($attributes['type'])!='hidden')
|
|
static::$framesetOk=false;
|
|
}
|
|
elseif($name=='param' || $name=='source' || $name=='track')
|
|
{
|
|
static::insertElement($token);
|
|
# Immediately pop the current node off the stack of open elements.
|
|
static::stackPop();
|
|
# Can't acknowledge the self-closing flag.
|
|
}
|
|
elseif($name=='hr')
|
|
{
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::insertElement($token,false);
|
|
|
|
# Can't acknowledge the self-closing flag.
|
|
|
|
static::$framesetOk=false;
|
|
}
|
|
elseif($name=='image')
|
|
{
|
|
static::parseError('invalid start tag','image','img');
|
|
$token['name']='img';
|
|
static::emitToken($token);
|
|
}
|
|
elseif($name=='isindex')
|
|
{
|
|
static::parseError('invalid start tag','isindex','form');
|
|
|
|
if(!is_null(static::$form))
|
|
return false;
|
|
|
|
# Can't acknowledge the self-closing flag.
|
|
|
|
# Act as if a start tag token with the tag name "form" had been seen.
|
|
# If the token has an attribute called "action", set the action
|
|
# attribute on the resulting form element to the value of the
|
|
# "action" attribute of the token.
|
|
$temp=array('type'=>'start tag',
|
|
'name'=>'form');
|
|
if(isset($attributes['action']))
|
|
$temp['action']=$attributes['action'];
|
|
static::emitToken($temp);
|
|
|
|
# Act as if a start tag token with the tag name "hr" had been seen.
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'hr'));
|
|
|
|
# Act as if a start tag token with the tag name "label" had been seen.
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'label'));
|
|
|
|
# Act as if a stream of character tokens had been seen.
|
|
# If the token has an attribute with the name "prompt", then the first
|
|
# stream of characters must be the same string as given in that
|
|
# attribute, and the second stream of characters must be empty.
|
|
# Otherwise, the two streams of character tokens together should,
|
|
# together with the input element, express the equivalent of "This is a
|
|
# searchable index. Enter search keywords: (input field)" in the user's
|
|
# preferred language.
|
|
if(isset($attributes['prompt']))
|
|
{
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$attributes['prompt']));
|
|
$temp='';
|
|
}
|
|
else
|
|
{
|
|
$temp='This is a searchable index. Enter search keywords:';
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$temp));
|
|
}
|
|
|
|
# Act as if a start tag token with the tag name "input" had been seen,
|
|
# with all the attributes from the "isindex" token except "name",
|
|
# "action", and "prompt". Set the name attribute of the resulting input
|
|
# element to the value "isindex".
|
|
unset($attributes['action'],$attributes['prompt']);
|
|
$attributes['name']='isindex';
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'input',
|
|
'attributes'=>$attributes));
|
|
|
|
# Act as if a stream of character tokens had been seen.
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$temp));
|
|
|
|
# Act as if an end tag token with the tag name "label" had been seen.
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'label'));
|
|
|
|
# Act as if a start tag token with the tag name "hr" had been seen.
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'hr'));
|
|
|
|
# Act as if an end tag token with the tag name "form" had been seen.
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'form'));
|
|
}
|
|
elseif($name=='textarea')
|
|
{
|
|
static::insertElement($token);
|
|
|
|
# If the next token is a U+000A LINE FEED (LF) character token, then
|
|
# ignore that token and move on to the next one. (Newlines at the
|
|
# start of textarea elements are ignored as an authoring
|
|
# convenience.)
|
|
if(static::peek()=="\n")
|
|
static::consume();
|
|
|
|
static::$state='RCDATA';
|
|
static::$oMode=static::$mode;
|
|
static::$framesetOk=false;
|
|
static::$mode='text';
|
|
}
|
|
elseif($name=='xmp')
|
|
{
|
|
if(static::inScope('p','button'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'p'));
|
|
|
|
static::activeReconstruct();
|
|
static::$framesetOk=false;
|
|
|
|
# Follow the generic raw text element parsing algorithm.
|
|
goto genericRawTextElementParsingAlgorithm;
|
|
}
|
|
elseif($name=='iframe')
|
|
{
|
|
static::$framesetOk=false;
|
|
|
|
# Follow the generic raw text element parsing algorithm.
|
|
goto genericRawTextElementParsingAlgorithm;
|
|
}
|
|
# Just noembed because there's no scripting in this implementation.
|
|
# Follow the generic raw text element parsing algorithm.
|
|
elseif($name=='noembed')
|
|
goto genericRawTextElementParsingAlgorithm;
|
|
elseif($name=='select')
|
|
{
|
|
static::activeReconstruct();
|
|
static::insertElement($token);
|
|
static::$framesetOk=false;
|
|
|
|
$mode=static::$mode;
|
|
static::$mode=($mode=='in table' || $mode=='in caption' || $mode=='in table body' || $mode=='in row' || $mode=='in cell') ? 'in select in table' : 'in select';
|
|
}
|
|
elseif($name=='optgroup' || $name=='option')
|
|
{
|
|
if(static::$currentNodeName=='option')
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'option'));
|
|
|
|
static::activeReconstruct();
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='rp' || $name=='rt')
|
|
{
|
|
if(static::inScope('ruby'))
|
|
{
|
|
static::generateImpliedEndTags();
|
|
if(static::$currentNodeName!='ruby')
|
|
static::parseError('unexpected start tag',$name,'ruby');
|
|
}
|
|
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='math')
|
|
{
|
|
static::activeReconstruct();
|
|
|
|
# Adjust MathML attributes for the token. (This fixes the case of MathML
|
|
# attributes that are not all lowercase.)
|
|
if(isset($attributes['definitionurl']))
|
|
{
|
|
$token['attributes']['definitionURL']=$attributes['definitionurl'];
|
|
unset($token['attributes']['definitionurl']);
|
|
}
|
|
|
|
$token['namespace']='http://www.w3.org/1998/Math/MathML';
|
|
static::insertElement($token,($token['selfClosing']) ? false : true);
|
|
|
|
# Can't acknowledge the self-closing flag.
|
|
}
|
|
elseif($name=='svg')
|
|
{
|
|
static::activeReconstruct();
|
|
|
|
# Adjust SVG attributes for the token. (This fixes the case of SVG
|
|
# attributes that are not all lowercase.)
|
|
if(isset($attributes))
|
|
{
|
|
foreach($attributes as $key=>$value)
|
|
{
|
|
if(isset(static::$svgAttributes[$key]))
|
|
{
|
|
$token['attributes'][static::$svgAttributes[$key]]=$value;
|
|
unset($token['attributes'][$key]);
|
|
}
|
|
}
|
|
}
|
|
|
|
$token['namespace']='http://www.w3.org/2000/svg';
|
|
static::insertElement($token,(isset($token['selfClosing'])) ? false : true);
|
|
|
|
# Can't acknowledge the self-closing flag.
|
|
}
|
|
elseif($name=='caption' || $name=='col' || $name=='colgroup' || $name=='frame' || $name=='head' ||
|
|
$name=='tbody' || $name=='td' || $name=='tfoot' || $name=='th' || $name=='thead' || $name=='tr')
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
static::activeReconstruct();
|
|
static::insertElement($token);
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='body')
|
|
{
|
|
if(!static::inScope('body'))
|
|
{
|
|
static::parseError('unexpected end tag','body',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# The first two elements in the stack should always at this point be
|
|
# the html and body elements respectively, so there's no point in
|
|
# having the loop check them. So, the loop ends at 1.
|
|
for($loop=static::$stackSize-1;$loop>1;$loop--)
|
|
{
|
|
$node=static::$stack[$loop];
|
|
|
|
$nodeName=static::$stack[$loop]->nodeName;
|
|
if($nodeName!='dd' && $nodeName!='dt' && $nodeName!='li' && $nodeName!='optgroup' && $nodeName!='option' &&
|
|
$nodeName!='p' && $nodeName!='rp' && $nodeName!='rt' && $nodeName!='tbody' && $nodeName!='td' &&
|
|
$nodeName!='tfoot' && $nodeName!='th' && $nodeName!='thead' && $nodeName!='tr' && $nodeName!='body' &&
|
|
$nodeName!='html')
|
|
static::parseError('unexpected end tag','body',$nodeName);
|
|
}
|
|
|
|
static::$mode='after body';
|
|
}
|
|
elseif($name=='html')
|
|
{
|
|
# Act as if an end tag with the name "body" had been seen.
|
|
# If that token wasn't ignored, reprocess the current token.
|
|
if(static::emitToken(array('type'=>'end tag',
|
|
'name'=>'body'))!==false)
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='address' || $name=='article' || $name=='aside' || $name=='blockquote' || $name=='button' ||
|
|
$name=='center' || $name=='details' || $name=='dialog' || $name=='dir' || $name=='div' || $name=='dl' ||
|
|
$name=='fieldset' || $name=='figcaption' || $name=='figure' || $name=='footer' || $name=='header' ||
|
|
$name=='hgroup' || $name=='listing' || $name=='main' || $name=='menu' || $name=='nav' || $name=='ol' ||
|
|
$name=='pre' || $name=='section' || $name=='summary' || $name=='ul')
|
|
{
|
|
# If the stack of open elements does not have an element in scope with
|
|
# the same tag name as that of the token, then this is a parse error;
|
|
# ignore the token.
|
|
if(!static::inScope($name))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::generateImpliedEndTags();
|
|
|
|
# If the current node is not an element with the same tag name as that
|
|
# of the token, then this is a parse error.
|
|
if(static::$currentNodeName!=$name)
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
# Pop elements from the stack of open elements until an element with
|
|
# the same tag name as the token has been popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName==$name)
|
|
break;
|
|
}
|
|
}
|
|
elseif($name=='form')
|
|
{
|
|
$node=static::$form;
|
|
static::$form=null;
|
|
|
|
if($node==null || static::inScope($node)===false)
|
|
{
|
|
static::parseError('unexpected end tag','form',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::generateImpliedEndTags();
|
|
if(static::$currentNode!=$node)
|
|
static::parseError('unexpected end tag','form',static::$currentNodeName);
|
|
|
|
# Remove node from the stack of open elements.
|
|
for($loop=static::$stackSize-1;$loop>=$stackPos;$loop--)
|
|
{
|
|
if(static::$stack[$loop]->isSameNode($node))
|
|
{
|
|
static::stackSplice($loop);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
elseif($name=='p')
|
|
{
|
|
# If the stack of open elements does not have an element in button scope
|
|
# with the same tag name as that of the token, then this is a parse
|
|
# error; act as if a start tag with the tag name "p" had been seen, then
|
|
# reprocess the current token.
|
|
if(!static::inScope('p','button'))
|
|
{
|
|
static::parseError('unexpected end tag','p',static::$currentNodeName);
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'p'));
|
|
goto reprocessToken;
|
|
}
|
|
|
|
# Generate implied end tags, except for elements with the same tag name
|
|
# as the token.
|
|
static::generateImpliedEndTags('p');
|
|
|
|
# If the current node is not an element with the same tag name as that
|
|
# of the token, then this is a parse error.
|
|
if(static::$currentNodeName!='p')
|
|
static::parseError('unexpected end tag','p',static::$currentNodeName);
|
|
|
|
# Pop elements from the stack of open elements until an element with
|
|
# the same tag name as the token has been popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName=='p')
|
|
break;
|
|
}
|
|
}
|
|
elseif($name=='li')
|
|
{
|
|
# If the stack of open elements does not have an element in list item
|
|
# scope with the same tag name as that of the token, then this is a
|
|
# parse error; ignore the token.
|
|
if(!static::inScope('li','list item'))
|
|
{
|
|
static::parseError('unexpected end tag','li',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# Generate implied end tags, except for elements with the same tag name
|
|
# as the token.
|
|
static::generateImpliedEndTags('li');
|
|
|
|
# If the current node is not an element with the same tag name as that
|
|
# of the token, then this is a parse error.
|
|
if(static::$currentNodeName!='li')
|
|
static::parseError('unexpected end tag','li',static::$currentNodeName);
|
|
|
|
# Pop elements from the stack of open elements until an element with
|
|
# the same tag name as the token has been popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName=='li')
|
|
break;
|
|
}
|
|
}
|
|
elseif($name=='dd' || $name=='dt')
|
|
{
|
|
# If the stack of open elements does not have an element in scope with
|
|
# the same tag name as that of the token, then this is a parse error;
|
|
# ignore the token.
|
|
if(!static::inScope($name))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# Generate implied end tags, except for elements with the same tag name
|
|
# as the token.
|
|
static::generateImpliedEndTags($name);
|
|
|
|
# If the current node is not an element with the same tag name as that
|
|
# of the token, then this is a parse error.
|
|
if(static::$currentNodeName!=$name)
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
# Pop elements from the stack of open elements until an element with
|
|
# the same tag name as the token has been popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName==$name)
|
|
break;
|
|
}
|
|
}
|
|
elseif($name=='h1' || $name=='h2' || $name=='h3' || $name=='h4' || $name=='h5' || $name=='h6')
|
|
{
|
|
if(!static::inScope('h1') && !static::inScope('h2') && !static::inScope('h3') && !static::inScope('h4') && !static::inScope('h5') && !static::inScope('h6'))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::generateImpliedEndTags();
|
|
if(static::$currentNodeName!=$name)
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
# Pop elements from the stack of open elements until an element whose
|
|
# tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6" has been
|
|
# popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName=='h1' || $nodeName=='h2' || $nodeName=='h3' || $nodeName=='h4' || $nodeName=='h5' || $nodeName=='h6')
|
|
break;
|
|
}
|
|
}
|
|
elseif($name=='a' || $name=='b' || $name=='big' || $name=='code' || $name=='em' || $name=='font' ||
|
|
$name=='i' || $name=='nobr' || $name=='s' || $name=='small' || $name=='strike' ||
|
|
$name=='strong' || $name=='tt' || $name=='u')
|
|
{
|
|
# Let outer loop counter be zero.
|
|
# If outer loop counter is greater than or equal to eight, then abort
|
|
# these steps.
|
|
for($loop=0;$loop<8;$loop++)
|
|
{
|
|
# Let the formatting element be the last element in the list of active
|
|
# formatting elements that:
|
|
# * is between the end of the list and the last scope marker in the
|
|
# list, if any, or the start of the list otherwise, and
|
|
# * has the same tag name as the token.
|
|
for($activePos=static::$activeSize-1;$activePos>=0;$activePos--)
|
|
{
|
|
$formattingElement=static::$active[$activePos];
|
|
if(is_string($formattingElement))
|
|
{
|
|
$formattingElement=(is_object(static::$active[0]) && static::$active[0]->nodeName==$name) ? static::$active[0] : null;
|
|
break;
|
|
}
|
|
elseif($formattingElement->nodeName==$name)
|
|
break;
|
|
else
|
|
$formattingElement=null;
|
|
}
|
|
|
|
# If there is no such node, then abort these steps and instead act as
|
|
# described in the "any other end tag" entry below.
|
|
if(is_null($formattingElement))
|
|
goto inBodyAnyOtherEndTag;
|
|
|
|
# Otherwise, if there is such a node, but that node is not in the stack
|
|
# of open elements, then this is a parse error; remove the element from
|
|
# the list, and abort these steps.
|
|
$nodeName=$formattingElement->nodeName;
|
|
$stackPos=array_search($formattingElement,static::$stack,true);
|
|
if($stackPos===false)
|
|
{
|
|
static::parseError('unexpected end tag',$nodeName,static::$currentNodeName);
|
|
static::activeSplice($activePos);
|
|
break;
|
|
}
|
|
# Otherwise, if there is such a node, and that node is also in the
|
|
# stack of open elements, but the element is not in scope, then this is
|
|
# a parse error; ignore the token, and abort these steps.
|
|
elseif(!static::inScope($formattingElement))
|
|
{
|
|
static::parseError('unexpected end tag',$nodeName,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
# Otherwise, there is a formatting element and that element is in the
|
|
# stack and is in scope. If the element is not the current node, this
|
|
# is a parse error. In any case, proceed with the algorithm as written
|
|
# in the following steps.
|
|
elseif($formattingElement!==static::$currentNode) {
|
|
static::parseError('unexpected end tag',$nodeName,static::$currentNodeName);
|
|
}
|
|
|
|
# Let the furthest block be the topmost node in the stack of open
|
|
# elements that is lower in the stack than the formatting element, and
|
|
# is an element in the special category. There might not be one.
|
|
$furthestBlock=null;
|
|
for($fbPos=$stackPos+1;$fbPos<static::$stackSize;$fbPos++)
|
|
{
|
|
$current=static::$stack[$fbPos];
|
|
if(static::isSpecial($current))
|
|
{
|
|
$furthestBlock=$current;
|
|
break;
|
|
}
|
|
}
|
|
|
|
# If there is no furthest block, then the UA must skip the subsequent
|
|
# steps and instead just pop all the nodes from the bottom of the
|
|
# stack of open elements, from the current node up to and including
|
|
# the formatting element, and remove the formatting element from the
|
|
# list of active formatting elements.
|
|
if(is_null($furthestBlock))
|
|
{
|
|
while($last = end(static::$stack)) {
|
|
static::stackPop();
|
|
if ($last->isSameNode($formattingElement)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
static::activeSplice($activePos);
|
|
break;
|
|
}
|
|
|
|
# Let the common ancestor be the element immediately above the
|
|
# formatting element in the stack of open elements.
|
|
$commonAncestor=static::$stack[$stackPos-1];
|
|
|
|
# Let a bookmark note the position of the formatting element in the
|
|
# list of active formatting elements relative to the elements on
|
|
# either side of it in the list.
|
|
$bookmark=$activePos;
|
|
$stackNodePos=$fbPos;
|
|
|
|
# Let node and last node be the furthest block.
|
|
$node=$furthestBlock;
|
|
$lastNode=$furthestBlock;
|
|
for($loop2=0;$loop2<3;$loop2++)
|
|
{
|
|
# Let node be the element immediately above node in the stack of open
|
|
# elements, or if node is no longer in the stack of open elements
|
|
# (e.g. because it got removed by the next step), the element that was
|
|
# immediately above node in the stack of open elements before node was
|
|
# removed.
|
|
$stackNodePos--;
|
|
$node=static::$stack[$stackNodePos];
|
|
|
|
# If node is not in the list of active formatting elements, then remove
|
|
# node from the stack of open elements and then go back to the step
|
|
# labeled inner loop.
|
|
$activeNodePos=array_search($node,static::$active,true);
|
|
if($activeNodePos===false)
|
|
{
|
|
static::stackSplice($stackNodePos);
|
|
continue;
|
|
}
|
|
|
|
# Otherwise, if node is the formatting element, then go to the next
|
|
# step in the overall algorithm.
|
|
if($node->isSameNode($formattingElement))
|
|
break;
|
|
|
|
# Create an element for the token for which the element node was
|
|
# created, replace the entry for node in the list of active formatting
|
|
# elements with an entry for the new element, replace the entry for
|
|
# node in the stack of open elements with an entry for the new
|
|
# element, and let node be the new element.
|
|
$newNode=$node->cloneNode();
|
|
static::activeSplice($activeNodePos,1,$newNode);
|
|
static::stackSplice($stackNodePos,1,$newNode);
|
|
$node=$newNode;
|
|
|
|
# If last node is the furthest block, then move the aforementioned
|
|
# bookmark to be immediately after the new node in the list of active
|
|
# formatting elements.
|
|
if($lastNode->isSameNode($furthestBlock))
|
|
$bookmark=$activeNodePos+1;
|
|
|
|
# Insert last node into node, first removing it from its previous
|
|
# parent node if any.
|
|
# PHP's DOM takes care of the removal.
|
|
$node->appendChild($lastNode);
|
|
|
|
# Let last node be node.
|
|
$lastNode=$node;
|
|
}
|
|
|
|
# If the common ancestor node is a table, tbody, tfoot, thead, or tr
|
|
# element, then, foster parent whatever last node ended up being in the
|
|
# previous step, first removing it from its previous parent node if any.
|
|
# # PHP's DOM takes care of the removal.
|
|
$commonAncestorName=$commonAncestor->nodeName;
|
|
|
|
if($commonAncestorName=='table' || $commonAncestorName=='tbody' || $commonAncestorName=='tfoot' ||
|
|
$commonAncestorName=='thead' || $commonAncestorName=='tr')
|
|
static::fosterParent($lastNode);
|
|
# Otherwise, append whatever last node ended up being in the previous
|
|
# step to the common ancestor node, first removing it from its
|
|
# previous parent node if any.
|
|
else
|
|
$commonAncestor->appendChild($lastNode);
|
|
|
|
# Create an element for the token for which the formatting element was
|
|
# created.
|
|
$newElement=$formattingElement->cloneNode();
|
|
|
|
# Take all of the child nodes of the furthest block and append them to
|
|
# the element created in the last step.
|
|
while($furthestBlock->firstChild)
|
|
{$newElement->appendChild($furthestBlock->firstChild);}
|
|
|
|
# Append that new element to the furthest block.
|
|
$furthestBlock->appendChild($newElement);
|
|
|
|
# Remove the formatting element from the list of active formatting
|
|
# elements, and insert the new element into the list of active
|
|
# formatting elements at the position of the aforementioned bookmark.
|
|
static::activeSplice(array_search($formattingElement,static::$active,true),1);
|
|
static::activeSplice($bookmark,1,$newElement);
|
|
|
|
# Remove the formatting element from the stack of open elements, and
|
|
# insert the new element into the stack of open elements immediately
|
|
# below the position of the furthest block in that stack.
|
|
static::stackSplice(array_search($formattingElement,static::$stack,true),1);
|
|
static::stackSplice(array_search($furthestBlock,static::$stack,true)+1,1,$newElement);
|
|
}
|
|
}
|
|
elseif($name=='applet' || $name=='marquee' || $name=='object')
|
|
{
|
|
if(!static::inScope($name))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::generateImpliedEndTags();
|
|
|
|
# If the current node is not an element with the same tag name as that
|
|
# of the token, then this is a parse error.
|
|
if(static::$currentNodeName!=$name)
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
# Pop elements from the stack of open elements until an element with
|
|
# the same tag name as the token has been popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName==$name)
|
|
break;
|
|
}
|
|
|
|
# Clear the list of active formatting elements up to the last marker.
|
|
while(true)
|
|
{
|
|
$entry=end(static::$active);
|
|
static::activePop();
|
|
if(is_string($entry))
|
|
break;
|
|
}
|
|
}
|
|
elseif($name=='br')
|
|
{
|
|
static::parseError('invalid end tag','br','br');
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'br',
|
|
'selfClosing'=>true));
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
inBodyAnyOtherEndTag:
|
|
|
|
$node=static::$currentNode;
|
|
$nodeName=static::$currentNodeName;
|
|
$nodePos=static::$stackSize-1;
|
|
while(true)
|
|
{
|
|
if($nodeName==$name)
|
|
{
|
|
static::generateImpliedEndTags($name);
|
|
|
|
if($nodeName!=static::$currentNodeName)
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
while(true)
|
|
{
|
|
$currentNode=static::$currentNode;
|
|
static::stackPop();
|
|
if($currentNode==$node)
|
|
break 2;
|
|
}
|
|
}
|
|
elseif(in_array($nodeName,static::$specialElements['html']))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
break;
|
|
}
|
|
$nodePos--;
|
|
$node=static::$stack[$nodePos];
|
|
$nodeName=$node->nodeName;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
break;
|
|
|
|
case 'text':
|
|
{
|
|
if($type=='character')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
elseif($type=='eof')
|
|
{
|
|
static::parseError('unexpected eof',static::$currentNodeName);
|
|
# No scripting in this implementation.
|
|
static::stackPop();
|
|
static::$mode=static::$oMode;
|
|
goto reprocessToken;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
# There's no scripting, so script end tags behave just like any other
|
|
# end tags.
|
|
static::stackPop();
|
|
static::$mode=static::$oMode;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in table':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
static::$pendingTableCharacterTokens=array();
|
|
static::$oMode=static::$mode;
|
|
static::$mode='in table text';
|
|
|
|
goto reprocessToken;
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='caption')
|
|
{
|
|
# Clear the stack back to a table context.
|
|
while(static::$currentNodeName!='table' && static::$currentNode!='html')
|
|
{static::stackPop();}
|
|
|
|
static::activePush('caption');
|
|
|
|
static::insertElement($token);
|
|
static::$mode='in caption';
|
|
}
|
|
elseif($name=='colgroup')
|
|
{
|
|
# Clear the stack back to a table context.
|
|
while(static::$currentNodeName!='table' && static::$currentNode!='html')
|
|
{static::stackPop();}
|
|
|
|
static::insertElement($token);
|
|
static::$mode='in column group';
|
|
}
|
|
elseif($name=='col')
|
|
{
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'colgroup'));
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='tbody' || $name=='tfoot' || $name=='thead')
|
|
{
|
|
# Clear the stack back to a table context.
|
|
while(static::$currentNodeName!='table' && static::$currentNode!='html')
|
|
{static::stackPop();}
|
|
|
|
static::insertElement($token);
|
|
static::$mode='in table body';
|
|
}
|
|
elseif($name=='td' || $name=='th' || $name=='tr')
|
|
{
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'tbody'));
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='table')
|
|
{
|
|
static::parseError('unexpected start tag','table','table');
|
|
|
|
# Act as if an end tag token with the tag name "table" had been seen,
|
|
# then, if that token wasn't ignored, reprocess the current token.
|
|
if(static::emitToken(array('type'=>'end tag',
|
|
'name'=>'table'))!==false)
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='style' || $name=='script')
|
|
static::emitToken($token,'in head');
|
|
elseif($name=='input')
|
|
{
|
|
if(!isset($attributes['type']) || strtolower($attributes['type'])!='hidden')
|
|
goto inTableAnythingElse;
|
|
|
|
static::parseError('invalid start tag','input','input');
|
|
static::insertElement($token);
|
|
static::stackPop();
|
|
}
|
|
elseif($name=='form')
|
|
{
|
|
static::parseError('unexpected start tag','form','table');
|
|
if(!is_null(static::$form))
|
|
return false;
|
|
else
|
|
{
|
|
static::$form=static::insertElement($token);
|
|
static::stackPop();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
inTableAnythingElse:
|
|
static::parseError('unexpected '.$type,static::$currentNodeName,$name);
|
|
|
|
if(static::$currentNodeName=='table' || static::$currentNodeName=='tbody' || static::$currentNodeName=='tfoot' ||
|
|
static::$currentNodeName=='thead' || static::$currentNodeName=='tr')
|
|
{
|
|
static::$fosterParenting=true;
|
|
static::emitToken($token,'in body');
|
|
static::$fosterParenting=false;
|
|
}
|
|
else
|
|
static::emitToken($token,'in body');
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='table')
|
|
{
|
|
if(static::$fragment===true)
|
|
{
|
|
if(!static::inScope($name,'table'))
|
|
{
|
|
static::parseError('unexpected end tag','table',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
# Pop elements from this stack until a table element has been popped
|
|
# from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName=='table')
|
|
break;
|
|
}
|
|
|
|
# Reset the insertion mode appropriately.
|
|
static::resetInsertionMode();
|
|
}
|
|
elseif($name=='body' || $name=='caption' || $name=='col' || $name=='colgroup' || $name=='html' || $name=='tbody' ||
|
|
$name=='td' || $name=='tfoot' || $name=='th' || $name=='thead' || $name=='tr')
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
else
|
|
goto inTableAnythingElse;
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
if(static::$currentNodeName!='html')
|
|
static::parseError('unexpected eof',static::$currentNodeName);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in table text':
|
|
{
|
|
if($type=='character')
|
|
static::$pendingTableCharacterTokens[]=$token;
|
|
else
|
|
{
|
|
# If any of the tokens in the pending table character tokens list are
|
|
# character tokens that are not space characters, then reprocess those
|
|
# character tokens using the rules given in the "anything else" entry
|
|
# in the "in table" insertion mode.
|
|
# Otherwise, insert the characters given by the pending table character
|
|
# tokens list into the current node.
|
|
|
|
$tokens='';
|
|
foreach(static::$pendingTableCharacterTokens as $t)
|
|
{$tokens.=$t['data'];}
|
|
|
|
if(!preg_match('/[^'.static::WHITESPACE.']/',$tokens))
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($tokens));
|
|
else
|
|
{
|
|
static::parseError('unexpected character',$tokens,static::$currentNodeName);
|
|
|
|
if(static::$currentNodeName=='table' || static::$currentNodeName=='tbody' || static::$currentNodeName=='tfoot' ||
|
|
static::$currentNodeName=='thead' || static::$currentNodeName=='tr')
|
|
{
|
|
static::$fosterParenting=true;
|
|
static::emitToken(array('type'=>'character',
|
|
'data'=>$tokens),'in body');
|
|
static::$fosterParenting=false;
|
|
}
|
|
else
|
|
static::emitToken($t,'in body');
|
|
}
|
|
# Switch the insertion mode to the original insertion mode and reprocess
|
|
# the token.
|
|
static::$mode=static::$oMode;
|
|
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in caption':
|
|
{
|
|
if($type=='start tag')
|
|
{
|
|
if($name=='caption' || $name=='col' || $name=='colgroup' || $name=='tbody' || $name=='td' ||
|
|
$name=='tfoot' || $name=='th' || $name=='thead' || $name=='tr')
|
|
goto inCaptionEndTagTable;
|
|
else
|
|
goto inCaptionAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='caption')
|
|
{
|
|
if(!static::inScope('caption','table'))
|
|
{
|
|
static::parseError('unexpected end tag','caption',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::generateImpliedEndTags();
|
|
|
|
if(static::$currentNodeName!='caption')
|
|
static::parseError('unexpected end tag','caption',static::$currentNodeName);
|
|
|
|
# Pop elements from this stack until a caption element has been popped
|
|
# from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName=='caption')
|
|
break;
|
|
}
|
|
|
|
# Clear the list of active formatting elements up to the last marker.
|
|
while(true)
|
|
{
|
|
$entry=end(static::$active);
|
|
static::activePop();
|
|
if(is_string($entry))
|
|
break;
|
|
}
|
|
|
|
static::$mode='in table';
|
|
}
|
|
elseif($name=='table')
|
|
{
|
|
inCaptionEndTagTable:
|
|
|
|
# Act as if an end tag with the tag name "caption" had been seen, then,
|
|
# if that token wasn't ignored, reprocess the current token.
|
|
if(static::emitToken(array('type'=>'end tag',
|
|
'name'=>'caption'))!==false)
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='body' || $name=='col' || $name=='colgroup' || $name=='html' ||
|
|
$name=='tbody' || $name=='td' || $name=='tfoot' || $name=='th' ||
|
|
$name=='thead' || $name=='tr')
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
else
|
|
goto inCaptionAnythingElse;
|
|
}
|
|
else
|
|
{
|
|
inCaptionAnythingElse:
|
|
static::emitToken($token,'in body');
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in column group':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
else
|
|
{
|
|
inColumnGroupAnythingElse:
|
|
if(static::emitToken(array('type'=>'end tag',
|
|
'name'=>'colgroup'))!==false)
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='col')
|
|
{
|
|
static::insertElement($token);
|
|
static::stackPop();
|
|
# Can't acknowledge the token's self-closing flag.
|
|
}
|
|
else
|
|
goto inColumnGroupAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='colgroup')
|
|
{
|
|
# If the current node is the root html element, then this is a parse
|
|
# error; ignore the token. (fragment case)
|
|
if(static::$currentNode->isSameNode(static::$stack[0]))
|
|
{
|
|
static::parseError('unexpected end tag','colgroup',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::stackPop();
|
|
static::$mode='in table';
|
|
}
|
|
elseif($name=='col')
|
|
static::parseError('unexpected end tag','col',static::$currentNodeName);
|
|
else
|
|
goto inColumnGroupAnythingElse;
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
if(static::$currentNode->isSameNode(static::$stack[0]))
|
|
{
|
|
// STOP PARSING.
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in table body':
|
|
{
|
|
if($type=='start tag')
|
|
{
|
|
if($name=='tr')
|
|
{
|
|
# Clear the stack back to a table body context.
|
|
while(static::$currentNodeName!='tbody' && static::$currentNodeName!='tfoot' && static::$currentNodeName!='thead' && static::$currentNodeName!='html')
|
|
{static::stackPop();}
|
|
|
|
static::insertElement($token);
|
|
static::$mode='in row';
|
|
}
|
|
elseif($name=='th' || $name=='td')
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
static::emitToken(array('type'=>'start tag',
|
|
'name'=>'tr'));
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='caption' || $name=='col' || $name=='colgroup' || $name=='tbody' || $name=='tfoot' || $name=='thead')
|
|
goto inTableBodyEndTagTable;
|
|
else
|
|
goto inTableBodyAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='tbody' || $name=='tfoot' || $name=='thead')
|
|
{
|
|
if(!static::inScope($name,'table'))
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
# Clear the stack back to a table body context.
|
|
while(static::$currentNodeName!='tbody' && static::$currentNodeName!='tfoot' && static::$currentNodeName!='thead' && static::$currentNodeName!='html')
|
|
{static::stackPop();}
|
|
|
|
static::stackPop();
|
|
static::$mode='in table';
|
|
}
|
|
elseif($name=='table')
|
|
{
|
|
inTableBodyEndTagTable:
|
|
if(!static::inScope('tbody','table') && !static::inScope('thead','table') && !static::inScope('tfoot','table'))
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# Clear the stack back to a table body context.
|
|
while(static::$currentNodeName!='tbody' && static::$currentNodeName!='tfoot' && static::$currentNodeName!='thead' && static::$currentNodeName!='html')
|
|
{static::stackPop();}
|
|
|
|
# Act as if an end tag with the same tag name as the current node
|
|
# ("tbody", "tfoot", or "thead") had been seen, then reprocess the
|
|
# current token.
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>static::$currentNodeName));
|
|
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='body' || $name=='caption' || $name=='col' || $name=='colgroup' || $name=='html' || $name=='td' || $name=='th' || $name=='tr')
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
else
|
|
goto inTableBodyAnythingElse;
|
|
}
|
|
else
|
|
{
|
|
inTableBodyAnythingElse:
|
|
static::emitToken($token,'in table');
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in row':
|
|
{
|
|
if($type=='start tag')
|
|
{
|
|
if($name=='th' || $name=='td')
|
|
{
|
|
# Clear the stack back to a table row context.
|
|
while(static::$currentNodeName!='tr' && static::$currentNodeName!='html')
|
|
{static::stackPop();}
|
|
|
|
# Insert an HTML element for the token, then switch the insertion mode
|
|
# to "in cell".
|
|
static::insertElement($token);
|
|
static::$mode='in cell';
|
|
static::activePush($name);
|
|
}
|
|
elseif($name=='caption' || $name=='col' || $name=='colgroup' || $name=='tbody' || $name=='tfoot' ||
|
|
$name=='thead' || $name=='tr')
|
|
goto inRowEndTagTable;
|
|
else
|
|
goto inRowAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='tr')
|
|
{
|
|
if(!static::inScope('tr','table'))
|
|
{
|
|
static::parseError('unexpected end tag','tr',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# Clear the stack back to a table row context.
|
|
while(static::$currentNodeName!='tr' && static::$currentNodeName!='html')
|
|
{static::stackPop();}
|
|
|
|
# Pop the current node (which will be a tr element) from the stack of
|
|
# open elements.
|
|
static::stackPop();
|
|
|
|
static::$mode='in table body';
|
|
}
|
|
elseif($name=='table')
|
|
{
|
|
inRowEndTagTable:
|
|
|
|
if(static::emitToken(array('type'=>'end tag',
|
|
'name'=>'tr'))!==false)
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='tbody' || $name=='tfoot' || $name=='thead')
|
|
{
|
|
if(!static::inScope($name,'table'))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'tr'));
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='body' || $name=='caption' || $name=='col' || $name=='colgroup' || $name=='html' ||
|
|
$name=='td' || $name=='th')
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
else
|
|
goto inRowAnythingElse;
|
|
}
|
|
else
|
|
{
|
|
inRowAnythingElse:
|
|
static::emitToken($token,'in table');
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in cell':
|
|
{
|
|
if($type=='start tag')
|
|
{
|
|
if($name=='caption' || $name=='col' || $name=='colgroup' || $name=='tbody' || $name=='td' || $name=='tfoot' ||
|
|
$name=='th' || $name=='thead' || $name=='tr')
|
|
{
|
|
if(!static::inScope('td','table') && !static::inScope('th','table'))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# Otherwise, close the cell and reprocess the current token.
|
|
if(static::inScope('td','table'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'td'));
|
|
else
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'th'));
|
|
|
|
goto reprocessToken;
|
|
}
|
|
else
|
|
goto inCellAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='td' || $name=='th')
|
|
{
|
|
if(!static::inScope($name,'table'))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
static::generateImpliedEndTags();
|
|
if(static::$currentNodeName!=$name)
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
# Pop elements from the stack of open elements stack until an element
|
|
# with the same tag name as the token has been popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName==$name)
|
|
break;
|
|
}
|
|
|
|
# Clear the list of active formatting elements up to the last marker.
|
|
while(true)
|
|
{
|
|
$entry=end(static::$active);
|
|
static::activePop();
|
|
if(is_string($entry))
|
|
break;
|
|
}
|
|
|
|
static::$mode='in row';
|
|
}
|
|
elseif($name=='body' || $name=='tbody' || $name=='tfoot' || $name=='thead' || $name=='tr')
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($name=='table' || $name=='tbody' || $name=='tfoot' || $name=='thead' || $name=='tr')
|
|
{
|
|
if(!static::inScope($name,'table'))
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# Otherwise, close the cell and reprocess the current token.
|
|
if(static::inScope('td','table'))
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'td'));
|
|
else
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'th'));
|
|
|
|
goto reprocessToken;
|
|
}
|
|
else
|
|
goto inCellAnythingElse;
|
|
}
|
|
else
|
|
{
|
|
inCellAnythingElse:
|
|
static::emitToken($token,'in body');
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in select':
|
|
{
|
|
if($type=='character')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
elseif($type=='comment')
|
|
static::$DOM->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='option')
|
|
{
|
|
if(static::$currentNodeName=='option')
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'option'));
|
|
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='optgroup')
|
|
{
|
|
if(static::$currentNodeName=='option')
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'option'));
|
|
|
|
if(static::$currentNodeName=='optgroup')
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'optgroup'));
|
|
|
|
static::insertElement($token);
|
|
}
|
|
elseif($name=='select')
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'select'));
|
|
}
|
|
elseif($name=='input' || $name=='keygen' || $name=='textarea')
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
|
|
if(!static::inScope('select','select'))
|
|
return false;
|
|
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'select'));
|
|
goto reprocessToken;
|
|
}
|
|
elseif($name=='script')
|
|
static::emitToken($token,'in head');
|
|
else
|
|
{
|
|
inSelectAnythingElse:
|
|
static::parseError('unexpected '.$type,static::$currentNodeName,$name);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
if($name=='optigroup')
|
|
{
|
|
# First, if the current node is an option element, and the node
|
|
# immediately before it in the stack of open elements is an optgroup
|
|
# element, then act as if an end tag with the tag name "option" had been
|
|
# seen.
|
|
if(static::$currentNodeName=='option' && static::$stack[static::$stackSize-2]=='optgroup')
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'option'));
|
|
|
|
# If the current node is an optgroup element, then pop that node from
|
|
# the stack of open elements. Otherwise, this is a parse error; ignore
|
|
# the token.
|
|
if(static::$currentNodeName=='optgroup')
|
|
static::stackPop();
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag','optgroup',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($name=='option')
|
|
{
|
|
if(static::$currentNodeName=='option')
|
|
static::stackPop();
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag','option',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($name=='select')
|
|
{
|
|
if(!static::inScope('select','select'))
|
|
{
|
|
static::parseError('unexpected end tag','option',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
|
|
# Pop elements from the stack of open elements stack until a select
|
|
# element has been popped from the stack.
|
|
while(true)
|
|
{
|
|
$nodeName=static::$currentNodeName;
|
|
static::stackPop();
|
|
if($nodeName=='select')
|
|
break;
|
|
}
|
|
|
|
static::resetInsertionMode();
|
|
}
|
|
else
|
|
goto inSelectAnythingElse;
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
if(static::$fragment===true)
|
|
{
|
|
if(static::$currentNodeName!='html')
|
|
static::parseError('unexpected eof',static::$currentNodeName);
|
|
}
|
|
|
|
// STOP PARSING.
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in select in table':
|
|
{
|
|
if($type=='start tag')
|
|
{
|
|
if($name=='caption' || $name=='table' || $name=='tbody' || $name=='tfoot' || $name=='thead' ||
|
|
$name=='tr' || $name=='td' || $name=='th')
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'select'));
|
|
goto reprocessToken;
|
|
}
|
|
else
|
|
goto inSelectInTableAnythingElse;
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='caption' || $name=='table' || $name=='tbody' || $name=='tfoot' || $name=='thead' ||
|
|
$name=='tr' || $name=='td' || $name=='th')
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
|
|
if(static::inScope($name,'table'))
|
|
{
|
|
static::emitToken(array('type'=>'end tag',
|
|
'name'=>'select'));
|
|
goto reprocessToken;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
else
|
|
goto inSelectInTableAnythingElse;
|
|
}
|
|
else
|
|
{
|
|
inSelectInTableAnythingElse:
|
|
static::emitToken($token,'in select');
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after body':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::$stack[0]->appendChild(static::$DOM->createTextNode($data));
|
|
else
|
|
{
|
|
static::parseError('unexpected character',$data,static::$currentNodeName);
|
|
afterBodyAnythingElse:
|
|
static::$mode='body';
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$stack[0]->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
else
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
goto afterBodyAnythingElse;
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='html')
|
|
{
|
|
if(static::$fragment===true)
|
|
{
|
|
static::parseError('unexpected end tag','html',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
static::$mode='after after body';
|
|
}
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
goto afterBodyAnythingElse;
|
|
}
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
// STOP PARSING.
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'in frameset':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
else
|
|
{
|
|
static::parseError('unexpected character',$data,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='frameset')
|
|
static::insertElement($token);
|
|
elseif($name=='frame')
|
|
{
|
|
static::insertElement($token);
|
|
static::stackPop();
|
|
|
|
# Can't acknowledge the token's self-closing flag.
|
|
}
|
|
elseif($name=='noframes')
|
|
static::insertElement($token,'in head');
|
|
else
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='frameset')
|
|
{
|
|
if(static::$fragment===true)
|
|
{
|
|
if(static::$currentNode->isSameNode(static::$stack[0]))
|
|
{
|
|
static::parseError('unexpected start tag','frameset',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static::stackPop();
|
|
|
|
if(static::$fragment===false && static::$currentNode!='frameset')
|
|
static::$mode='after frameset';
|
|
}
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
if(static::$currentNode!==static::$stack[0])
|
|
static::parseError('unexpected eof',static::$currentNodeName);
|
|
|
|
// STOP PARSING.
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after frameset':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::$currentNode->appendChild(static::$DOM->createTextNode($data));
|
|
else
|
|
{
|
|
static::parseError('unexpected character',$data,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$currentNode->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
{
|
|
static::parseError('unexpected doctype',static::$currentNodeName);
|
|
return false;
|
|
}
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='noframes')
|
|
static::emitToken($token,'in head');
|
|
else
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
if($name=='html')
|
|
static::$mode='after after frameset';
|
|
else
|
|
{
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
return false;
|
|
}
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
// STOP PARSING.
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after after body':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::emitToken($token,'in body');
|
|
else
|
|
{
|
|
static::parseError('unexpected character',$data,static::$currentNodeName);
|
|
afterAfterBodyAnythingElse:
|
|
static::$mode='in body';
|
|
goto reprocessToken;
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$DOM->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
static::emitToken($token,'in body');
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
else
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
goto afterAfterBodyAnythingElse;
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
goto afterAfterBodyAnythingElse;
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
// STOP PARSING.
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'after after frameset':
|
|
{
|
|
if($type=='character')
|
|
{
|
|
if($data=="\t" || $data=="\n" || $data=="\x0c" || $data=="\x0d" || $data==' ')
|
|
static::emitToken($token,'in body');
|
|
else
|
|
{
|
|
static::parseError('unexpected character',$data,static::$currentNodeName);
|
|
static::$mode='in body';
|
|
static::emitToken($token);
|
|
}
|
|
}
|
|
elseif($type=='comment')
|
|
static::$DOM->appendChild(static::$DOM->createComment($data));
|
|
elseif($type=='DOCTYPE')
|
|
static::emitToken($token,'in body');
|
|
elseif($type=='start tag')
|
|
{
|
|
if($name=='html')
|
|
static::emitToken($token,'in body');
|
|
elseif($name=='noframes')
|
|
static::emitToken($token,'in head');
|
|
else
|
|
{
|
|
static::parseError('unexpected start tag',$name,static::$currentNodeName);
|
|
static::$mode='in body';
|
|
static::emitToken($token);
|
|
}
|
|
}
|
|
elseif($type=='end tag')
|
|
{
|
|
# Check for attributes. If they exist trigger a parse error.
|
|
if(is_array($attributes))
|
|
static::parseError('attributes in end tag',$name);
|
|
# Check for self-closing flag. If it exists trigger a parse error.
|
|
if($selfClosing!==null)
|
|
static::parseError('self-closing end tag',$name);
|
|
|
|
static::parseError('unexpected end tag',$name,static::$currentNodeName);
|
|
static::$mode='in body';
|
|
static::emitToken($token);
|
|
}
|
|
elseif($type=='eof')
|
|
{
|
|
// STOP PARSING.
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
# Pops an element off the end of the stack of open elements. Returns the last
|
|
# one popped.
|
|
# It also sets static::$currentNode, static::$currentNodeName, &
|
|
# static::$stackSize.
|
|
protected static function stackPop()
|
|
{
|
|
$node=array_pop(static::$stack);
|
|
|
|
if($node)
|
|
{
|
|
static::$currentNode=end(static::$stack);
|
|
static::$currentNodeName=static::$currentNode->nodeName;
|
|
static::$stackSize--;
|
|
}
|
|
else
|
|
{
|
|
static::$currentNode=null;
|
|
static::$currentNodeName=null;
|
|
static::$stackSize=0;
|
|
}
|
|
|
|
return $node;
|
|
}
|
|
|
|
# Pushes an element onto the end of the stack of open elements.
|
|
# It also sets static::$currentNode, static::$currentNodeName, &
|
|
# static::$stackSize.
|
|
# @param $node Node to push onto the end of the stack.
|
|
protected static function stackPush($node)
|
|
{
|
|
static::$stack[]=$node;
|
|
static::$currentNode=$node;
|
|
static::$currentNodeName=static::$currentNode->nodeName;
|
|
static::$stackSize++;
|
|
}
|
|
|
|
# Removes the elements designated by $offset and $length from
|
|
# the stack.
|
|
# It also sets static::$currentNode, static::$currentNodeName, &
|
|
# static::$stackSize.
|
|
# @param $offset Offset to start from.
|
|
# @param $length The number of elements to slice off.
|
|
# @param $replacement The array to replace the sliced elements with.
|
|
protected static function stackSplice($offset,$length=1,$replacement=null)
|
|
{
|
|
if(!is_null($replacement))
|
|
{
|
|
if(!is_array($replacement))
|
|
$replacement=array($replacement);
|
|
}
|
|
|
|
array_splice(static::$stack,$offset,$length,$replacement);
|
|
static::$currentNode=end(static::$stack);
|
|
static::$currentNodeName=static::$currentNode->nodeName;
|
|
static::$stackSize=sizeof(static::$stack);
|
|
}
|
|
|
|
# Pops an element off the end of the list of active formatting elements.
|
|
# Returns the last one popped.
|
|
# It also sets static::$activeSize.
|
|
# @param $count Number of times to pop elements off the list. Defaults to 1.
|
|
protected static function activePop($count=1)
|
|
{
|
|
$output=array_pop(static::$active);
|
|
|
|
if($output)
|
|
static::$activeSize--;
|
|
else
|
|
static::$activeSize=0;
|
|
|
|
return $output;
|
|
}
|
|
|
|
# Pushes an element onto the end of the list of active formatting elements.
|
|
# It also sets static::$activeSize.
|
|
# @param $node Node to push onto the end of the list.
|
|
# @param $token Token used for checking againstwithin the list. Defaults to
|
|
# null.
|
|
protected static function activePush($node,$token=null)
|
|
{
|
|
# If there are already three elements in the list of active formatting
|
|
# elements after the last list marker, if any, or anywhere in the list if
|
|
# there are no list markers, that have the same tag name, namespace, and
|
|
# attributes as element, then remove the earliest such element from the list
|
|
# of active formatting elements. For these purposes, the attributes must be
|
|
# compared as they were when the elements were created by the parser; two
|
|
# elements have the same attributes if all their parsed attributes can be
|
|
# paired such that the two attributes in each pair have identical names,
|
|
# namespaces, and values (the order of the attributes does not matter).
|
|
|
|
# It's better to add the node to the list first so that if $node is a marker
|
|
# it just returns after adding the node to the list. This implementation of
|
|
# the Noah's Ark algorithm just compensates for the extra item in the stack
|
|
# by beginning its reverse iteration through the list at the next to the last
|
|
# item in the list.
|
|
static::$active[]=$node;
|
|
static::$activeSize++;
|
|
|
|
if(is_string($node))
|
|
return;
|
|
|
|
$count=0;
|
|
# Although it's thoroughly insane to rewrite much of this loop it's much
|
|
# faster to check if there's attributes on the token first than in each
|
|
# iteration of the loop.
|
|
if(isset($token['attributes']))
|
|
{
|
|
for($loop=static::$activeSize-2;$loop>=0;$loop--)
|
|
{
|
|
$current=static::$active[$loop];
|
|
|
|
if(is_string($current))
|
|
break;
|
|
|
|
if($current->nodeName!=$token['name'])
|
|
continue;
|
|
if($current->namespaceURI!=$token['namespace'])
|
|
continue;
|
|
if(!$current->hasAttributes())
|
|
continue;
|
|
|
|
$attributes=$token['attributes'];
|
|
$attr=$current->attributes;
|
|
$attrLen=$attr->length;
|
|
for($loop2=0;$loop2<$attrLen;$loop2++)
|
|
{
|
|
$item=$attr->item($loop2);
|
|
$name=$item->nodeName;
|
|
if(!array_key_exists($name,$attributes))
|
|
continue 2;
|
|
if($attributes[$name]!=$item->value)
|
|
continue 2;
|
|
}
|
|
|
|
if($count==2)
|
|
{
|
|
static::activeSplice($loop);
|
|
continue;
|
|
}
|
|
$count++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for($loop=static::$activeSize-2;$loop>=0;$loop--)
|
|
{
|
|
$current=static::$active[$loop];
|
|
|
|
if(is_string($current))
|
|
break;
|
|
|
|
if($current->nodeName!=$token['name'])
|
|
continue;
|
|
if($current->namespaceURI!=$token['namespace'])
|
|
continue;
|
|
if($current->hasAttributes())
|
|
continue;
|
|
|
|
if($count==2)
|
|
{
|
|
static::activeSplice($loop);
|
|
continue;
|
|
}
|
|
$count++;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Removes the elements designated by $offset and $length from the list of
|
|
# active formatting elements.
|
|
# It also sets static::$activeSize.
|
|
# @param $offset Offset to start from.
|
|
# @param $length The number of elements to slice off.
|
|
# @param $replacement The array to replace the sliced elements with.
|
|
protected static function activeSplice($offset,$length=1,$replacement=null)
|
|
{
|
|
if(!is_null($replacement))
|
|
{
|
|
if(!is_array($replacement))
|
|
$replacement=array($replacement);
|
|
}
|
|
|
|
array_splice(static::$active,$offset,$length,$replacement);
|
|
static::$activeSize=sizeof(static::$active);
|
|
}
|
|
|
|
# Method which reconstructs the active formatting elements.
|
|
protected static function activeReconstruct()
|
|
{
|
|
# If there are no entries in the list of active formatting elements, then
|
|
# there is nothing to reconstruct; stop this algorithm.
|
|
if(static::$activeSize==0)
|
|
return;
|
|
|
|
# If the last (most recently added) entry in the list of active formatting
|
|
# elements is a marker, or if it is an element that is in the stack of open
|
|
# elements, then there is nothing to reconstruct; stop this algorithm.
|
|
# Let entry be the last (most recently added) element in the list of active
|
|
# formatting elements.
|
|
$entry=end(static::$active);
|
|
if(is_string($entry) || in_array($entry,static::$stack,true))
|
|
return;
|
|
$key=static::$activeSize-1;
|
|
|
|
activeReconstructStep4:
|
|
# If there are no entries before entry in the list of active formatting
|
|
# elements, then jump to step 8.
|
|
if($key==0)
|
|
goto activeReconstructStep8;
|
|
|
|
# Let entry be the entry one earlier than entry in the list of active
|
|
# formatting elements.
|
|
$key--;
|
|
$entry=static::$active[$key];
|
|
|
|
# If entry is neither a marker nor an element that is also in the stack of
|
|
# open elements, go to step 4.
|
|
if(!is_string($entry) && !in_array($entry,static::$stack,true)) {
|
|
goto activeReconstructStep4;
|
|
}
|
|
|
|
activeReconstructStep7:
|
|
# Let entry be the element one later than entry in the list of active
|
|
# formatting elements.
|
|
$key++;
|
|
$entry=static::$active[$key];
|
|
|
|
# Create an element for the token for which the element entry was created, to
|
|
# obtain new element.
|
|
activeReconstructStep8:
|
|
$newElement=$entry->cloneNode();
|
|
|
|
# Append new element to the current node and push it onto the stack of open
|
|
# elements so that it is the new current node.
|
|
# Foster parenting stuff here for the purpose of processing tables. Described
|
|
# first in §13.2.5.4.9 under "Anything Else". This implementation uses a flag
|
|
# to determine if foster parenting is necessary.
|
|
if(static::$fosterParenting && (static::$currentNodeName=='table' || static::$currentNodeName=='tbody' ||
|
|
static::$currentNodeName=='tfoot' || static::$currentNodeName=='thead' ||
|
|
static::$currentNodeName=='tr'))
|
|
{
|
|
static::fosterParent($newElement);
|
|
static::stackPush($newElement);
|
|
}
|
|
else
|
|
{
|
|
static::$currentNode->appendChild($newElement);
|
|
static::stackPush($newElement);
|
|
}
|
|
|
|
# Replace the entry for entry in the list with an entry for new element.
|
|
static::activeSplice($key,1,$newElement);
|
|
|
|
# If the entry for new element in the list of active formatting elements is
|
|
# not the last entry in the list, return to step 7.
|
|
if($key!=static::$activeSize-1)
|
|
goto activeReconstructStep7;
|
|
}
|
|
|
|
# Method to insert an element into the DOM tree. Returns the node that was inserted.
|
|
# @param $token Token to be inserted as a node.
|
|
# @param $stack Flag specifying whether to append the node to the stack. Initially true.
|
|
protected static function insertElement($token,$stack=true)
|
|
{
|
|
if(!isset($token['namespace']))
|
|
{
|
|
$node=static::$DOM->createElement($token['name']);
|
|
|
|
if(isset($token['attributes']))
|
|
{
|
|
$attributes = $token['attributes'];
|
|
# PHP bug workaround.
|
|
/* if(isset($attributes['id']))
|
|
{
|
|
$id=static::$DOM->createAttribute('xml:id');
|
|
$id->appendChild(static::$DOM->createTextNode($attributes['id']));
|
|
$node->appendChild($id);
|
|
$node->setIdAttribute('xml:id',true);
|
|
unset($attributes['id']);
|
|
} */
|
|
|
|
foreach($attributes as $key=>$value)
|
|
{
|
|
if(!is_array($value))
|
|
$node->setAttribute($key,(is_null($value)) ? $key : $value);
|
|
else
|
|
$node->setAttributeNS($value['namespace'],$key,$value['value']);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
# Spec states to do this after the element is created, but it's FAR quicker
|
|
# to trigger the errors prior to adjusting foreign attributes and before
|
|
# creating the element.
|
|
if(isset($token['attributes']['xmlns']) && $token['attributes']['xmlns']!=$token['namespace'])
|
|
static::parseError('invalid foreign attribute',$token['name'],'xmlns',$token['namespace']);
|
|
if(isset($token['attributes']['xmlns:xlink']) && $token['attributes']['xmlns:xlink']!='http://www.w3.org/1999/xlink')
|
|
static::parseError('invalid foreign attribute',$token['name'],'xmlns:xlink','http://www.w3.org/1999/xlink');
|
|
|
|
$node=static::$DOM->createElementNS($token['namespace'],$token['name']);
|
|
|
|
# Instead adjust foreign attributes as they are added into the element.
|
|
if(isset($token['attributes']))
|
|
{
|
|
foreach($token['attributes'] as $key=>$value)
|
|
{
|
|
if(!isset(static::$foreignAttributes[$key]))
|
|
$node->setAttribute($key,(is_null($value)) ? $key : $value);
|
|
else
|
|
{
|
|
# Creating an actual attribute node and appending it is necessary due to
|
|
# some fucked up way PHP's DOM handles namespaced attributes.
|
|
$attr=static::$DOM->createAttributeNS(static::$foreignAttributes[$key],$key);
|
|
$attr->value=$value;
|
|
$node->appendChild($attr);
|
|
|
|
//$node->setAttributeNS(static::$foreignAttributes[$key],$key,$value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Foster parenting stuff here for the purpose of processing tables. Described
|
|
# first in §13.2.5.4.9 under "Anything Else". This implementation uses a flag
|
|
# to determine if foster parenting is necessary.
|
|
if(static::$fosterParenting && (static::$currentNodeName=='table' || static::$currentNodeName=='tbody' ||
|
|
static::$currentNodeName=='tfoot' || static::$currentNodeName=='thead' ||
|
|
static::$currentNodeName=='tr'))
|
|
static::fosterParent($node);
|
|
else
|
|
{
|
|
static::$currentNode->appendChild($node);
|
|
# There's no navigation of a browsing context necessary in this implementation.
|
|
|
|
if($stack===true)
|
|
static::stackPush($node);
|
|
}
|
|
|
|
return $node;
|
|
}
|
|
|
|
# Checks if a particular element or element type is in scope. Returns true or false.
|
|
# @param $element The target element. Can be either a string or a DOMElement.
|
|
# @param $scope Scope type to check for. Defaults to null.
|
|
protected static function inScope($element,$scope=null)
|
|
{
|
|
$node=static::$currentNode;
|
|
$key=static::$stackSize-1;
|
|
|
|
while(true)
|
|
{
|
|
$name=$node->nodeName;
|
|
$namespace=$node->namespaceURI;
|
|
|
|
$check=(is_string($element)) ? $name : $node;
|
|
if($check==$element)
|
|
return true;
|
|
elseif($namespace==null)
|
|
{
|
|
switch($scope)
|
|
{
|
|
case null:
|
|
{
|
|
if($name=='applet' || $name=='caption' || $name=='html' || $name=='table' ||
|
|
$name=='td' || $name=='th' || $name=='marquee' || $name=='object' || $name=='#document-fragment')
|
|
return false;
|
|
}
|
|
break;
|
|
case 'list item':
|
|
{
|
|
if($name=='applet' || $name=='caption' || $name=='html' || $name=='table' ||
|
|
$name=='td' || $name=='th' || $name=='marquee' || $name=='object' ||
|
|
$name=='ol' || $name=='ul' || $name=='#document-fragment')
|
|
return false;
|
|
}
|
|
break;
|
|
case 'button':
|
|
{
|
|
if($name=='applet' || $name=='caption' || $name=='html' || $name=='table' ||
|
|
$name=='td' || $name=='th' || $name=='marquee' || $name=='object' ||
|
|
$name=='button' || $name=='#document-fragment')
|
|
return false;
|
|
}
|
|
break;
|
|
case 'table':
|
|
{
|
|
if($name=='html' || $name=='table' || $name=='#document-fragment')
|
|
return false;
|
|
}
|
|
break;
|
|
case 'select':
|
|
{
|
|
if($name=='optgroup' || $name=='option')
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
elseif($scope!='table' && $scope!='select')
|
|
{
|
|
if($namespace=='http://www.w3.org/1998/Math/MathML' &&
|
|
($name=='mi' || $name=='mo' || $name=='mn' || $name=='ms' || $name='mtext' ||
|
|
$name=='annotation-xml'))
|
|
return false;
|
|
elseif($namespace=='http://www.w3.org/2000/svg' &&
|
|
($name=='foreignObject' || $name=='desc' || $name=='title'))
|
|
return false;
|
|
}
|
|
|
|
$key--;
|
|
$node=static::$stack[$key];
|
|
}
|
|
}
|
|
|
|
# Generates implied end tags.
|
|
# @param $exclusion A given element name to exclude from the array of implied
|
|
# elements. Defaults to null.
|
|
protected static function generateImpliedEndTags($exclusion=null)
|
|
{
|
|
$elements=array_diff(static::$impliedElements,array($exclusion));
|
|
|
|
while(in_array(static::$currentNodeName,$elements))
|
|
{static::stackPop();}
|
|
}
|
|
|
|
# Foster parents a given node.
|
|
# @param $node The node to foster parent.
|
|
protected static function fosterParent($node)
|
|
{
|
|
# The foster parent element is the parent element of the last table element
|
|
# in the stack of open elements, if there is a table element and it has such
|
|
# a parent element.
|
|
$fosterParent=null;
|
|
|
|
for($loop=static::$stackSize-1;$loop>=0;$loop--)
|
|
{
|
|
$current=static::$stack[$loop];
|
|
if($current->nodeName=='table')
|
|
{
|
|
$fosterParent=$current->parentNode;
|
|
$fosterParent->insertBefore($node,$current);
|
|
return;
|
|
}
|
|
}
|
|
if(static::$fragment===true)
|
|
{
|
|
if(is_null($fosterParent))
|
|
$fosterParent=static::$stack[0];
|
|
# WHAT THE FUCK?!
|
|
# DO THIS.
|
|
}
|
|
}
|
|
|
|
# Checks to see if a particular node is an element that requires special
|
|
# processing.
|
|
# @param $node Node to check if it is special.
|
|
protected static function isSpecial($node)
|
|
{
|
|
switch($node->namespaceURI)
|
|
{
|
|
case null: $specialElements=static::$specialElements['html'];
|
|
break;
|
|
case 'http://www.w3.org/1998/Math/MathML': $specialElements=static::$specialElements['mathml'];
|
|
break;
|
|
case 'http://www.w3.org/2000/svg': $specialElements=static::$specialElements['svg'];
|
|
break;
|
|
default: $specialElements=static::$specialElements['html'];
|
|
}
|
|
|
|
$nodeName=$node->nodeName;
|
|
return in_array($nodeName,$specialElements);
|
|
}
|
|
|
|
# Resets the insertion mode.
|
|
protected static function resetInsertionMode()
|
|
{
|
|
# Let last be false.
|
|
$last=false;
|
|
|
|
# Let node be the last node in the stack of open elements.
|
|
$node=static::$currentNode;
|
|
$nodeName=static::$currentNodeName;
|
|
$nodePos=static::$stackSize-1;
|
|
|
|
# A lot of code is being repeated here because one check for a fragment
|
|
# is much faster than several per loop.
|
|
if(static::$fragment===false)
|
|
{
|
|
while(true)
|
|
{
|
|
# If node is a td or th element and last is false, then switch the insertion
|
|
# mode to "in cell" and abort these steps.
|
|
if(($nodeName=='td' || $nodeName=='th') && $last===false)
|
|
{
|
|
static::$mode='in cell';
|
|
return;
|
|
}
|
|
|
|
# If node is a tr element, then switch the insertion mode to "in row" and
|
|
# abort these steps.
|
|
if($nodeName=='tr')
|
|
{
|
|
static::$mode='in row';
|
|
return;
|
|
}
|
|
|
|
# If node is a tbody, thead, or tfoot element, then switch the insertion mode
|
|
# to "in table body" and abort these steps.
|
|
if($nodeName=='tbody' || $nodeName=='thead' || $nodeName=='tfoot')
|
|
{
|
|
static::$mode='in table body';
|
|
return;
|
|
}
|
|
|
|
# If node is a caption element, then switch the insertion mode to
|
|
# "in caption" and abort these steps.
|
|
if($nodeName=='caption')
|
|
{
|
|
static::$mode='in caption';
|
|
return;
|
|
}
|
|
|
|
# If node is a body element, then switch the insertion mode to
|
|
# "in body" and abort these steps.
|
|
if($nodeName=='body')
|
|
{
|
|
static::$mode='in body';
|
|
return;
|
|
}
|
|
|
|
$nodePos--;
|
|
$node=static::$stack[$nodePos];
|
|
$nodeName=$node->nodeName;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while(true)
|
|
{
|
|
# If node is the first node in the stack of open elements, then set last to
|
|
# true and set node to the context element. (fragment case)
|
|
if($node->isSameNode(static::$stack[0]))
|
|
{
|
|
$last=true;
|
|
$node=static::$context;
|
|
$nodeName=$node->nodeName;
|
|
}
|
|
|
|
# If node is a select element, then switch the insertion mode to "in select"
|
|
# and abort these steps. (fragment case)
|
|
if($nodeName=='select')
|
|
{
|
|
static::$mode='in select';
|
|
return;
|
|
}
|
|
|
|
# If node is a td or th element and last is false, then switch the insertion
|
|
# mode to "in cell" and abort these steps.
|
|
if(($nodeName=='td' || $nodeName=='th') && $last===false)
|
|
{
|
|
static::$mode='in cell';
|
|
return;
|
|
}
|
|
|
|
# If node is a tr element, then switch the insertion mode to "in row" and
|
|
# abort these steps.
|
|
if($nodeName=='tr')
|
|
{
|
|
static::$mode='in row';
|
|
return;
|
|
}
|
|
|
|
# If node is a tbody, thead, or tfoot element, then switch the insertion mode
|
|
# to "in table body" and abort these steps.
|
|
if($nodeName=='tbody' || $nodeName=='thead' || $nodeName=='tfoot')
|
|
{
|
|
static::$mode='in table body';
|
|
return;
|
|
}
|
|
|
|
# If node is a caption element, then switch the insertion mode to
|
|
# "in caption" and abort these steps.
|
|
if($nodeName=='caption')
|
|
{
|
|
static::$mode='in caption';
|
|
return;
|
|
}
|
|
|
|
# If node is a colgroup element, then switch the insertion mode to
|
|
# "in column group" and abort these steps. (fragment case)
|
|
if($nodeName=='colgroup')
|
|
{
|
|
static::$mode='in column group';
|
|
return;
|
|
}
|
|
|
|
# If node is a table element, then switch the insertion mode to
|
|
# "in table" and abort these steps.
|
|
if($nodeName=='table')
|
|
{
|
|
static::$mode='in table';
|
|
return;
|
|
}
|
|
|
|
# If node is a head element, then switch the insertion mode to
|
|
# "in body" ("in body"! not "in head"!) and abort these steps.
|
|
# (fragment case)
|
|
# If node is a body element, then switch the insertion mode to
|
|
# "in body" and abort these steps.
|
|
if($nodeName=='head' || $nodeName=='body')
|
|
{
|
|
static::$mode='in body';
|
|
return;
|
|
}
|
|
|
|
# If node is a frameset element, then switch the insertion mode to
|
|
# "in frameset" and abort these steps. (fragment case)
|
|
if($nodeName=='frameset')
|
|
{
|
|
static::$mode='in frameset';
|
|
return;
|
|
}
|
|
|
|
# If node is an html element, then switch the insertion mode to
|
|
# "before head" Then, abort these steps. (fragment case)
|
|
if($nodeName=='html')
|
|
{
|
|
static::$mode='before head';
|
|
return;
|
|
}
|
|
|
|
# If last is true, then switch the insertion mode to "in body" and abort
|
|
# these steps. (fragment case)
|
|
if($last===true)
|
|
{
|
|
static::$mode='in body';
|
|
return;
|
|
}
|
|
|
|
$nodePos--;
|
|
$node=static::$stack[$nodePos];
|
|
$nodeName=static::$stack->nodeName;
|
|
}
|
|
}
|
|
}
|
|
|
|
public static function errorHandler($level,$message,$file,$line,$context)
|
|
{
|
|
switch($level)
|
|
{
|
|
case E_USER_WARNING: echo 'HTML5 Parse Error: '.$message."\n";
|
|
break;
|
|
case E_USER_ERROR: echo 'HTML5 Fatal Error: '.$message.' in '.$file.' on line '.$line."\n";
|
|
break;
|
|
default: return false;
|
|
}
|
|
|
|
if(static::$debug)
|
|
echo 'state: '.static::$state."\n".'mode: '.static::$mode."\n";
|
|
}
|
|
|
|
public static function parseError($error)
|
|
{
|
|
$message=static::$parseErrors[$error];
|
|
if(is_null($message))
|
|
return static::fatalError('invalid parse error',__METHOD__,$error);
|
|
|
|
$args=func_get_args();
|
|
$length=sizeof($args);
|
|
if($length>1)
|
|
{
|
|
array_shift($args);
|
|
|
|
$args=array_map(function($value)
|
|
{
|
|
if($value==="\n")
|
|
return 'Newline';
|
|
|
|
return "'$value'";
|
|
},$args);
|
|
|
|
$message=call_user_func_array('sprintf',array_merge([$message],$args));
|
|
}
|
|
|
|
trigger_error($message,E_USER_WARNING);
|
|
}
|
|
|
|
public static function fatalError($error,$method)
|
|
{
|
|
if(!is_string($method))
|
|
return static::fatalError('method expected',__METHOD__);
|
|
|
|
$message=static::$fatalErrors[$error];
|
|
if(is_null($message))
|
|
return static::fatalError('invalid fatal error',__METHOD__,$error);
|
|
|
|
$args=func_get_args();
|
|
$length=sizeof($args);
|
|
if($length>2)
|
|
{
|
|
array_shift($args);
|
|
array_shift($args);
|
|
|
|
$args=array_map(function($value)
|
|
{
|
|
if($value==="\n")
|
|
return 'Newline';
|
|
|
|
return "'$value'";
|
|
},$args);
|
|
|
|
$message=call_user_func_array('sprintf',array_merge(array($message),$args));
|
|
}
|
|
|
|
trigger_error($method.': '.$message,E_USER_ERROR);
|
|
return false;
|
|
}
|
|
}
|
|
|