File "_tokenizer.cpython-36.pyc"

Full Path: /home/attunedd/public_html/byp/izo/con7ext_sym404/rintoar.txt/lib/python3.6/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-36.pyc
File size: 41.09 KB
MIME-type: text/x-bytecode.python
Charset: 8 bit

3

Pf$+@sddlmZmZmZddlmZddlmZddl	m
Z
ddl	mZddl	mZm
Z
ddl	mZmZmZdd	l	mZmZdd
l	mZddlmZddlmZeeZGd
ddeZdS))absolute_importdivisionunicode_literals)unichr)deque)spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypes
tagTokenTypes)replacementCharacters)HTMLInputStream)TriecsdeZdZdZdfdd	ZddZddZdd
dZdd
ZddZ	ddZ
ddZddZddZ
ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Zd<d=Z d>d?Z!d@dAZ"dBdCZ#dDdEZ$dFdGZ%dHdIZ&dJdKZ'dLdMZ(dNdOZ)dPdQZ*dRdSZ+dTdUZ,dVdWZ-dXdYZ.dZd[Z/d\d]Z0d^d_Z1d`daZ2dbdcZ3dddeZ4dfdgZ5dhdiZ6djdkZ7dldmZ8dndoZ9dpdqZ:drdsZ;dtduZ<dvdwZ=dxdyZ>dzd{Z?d|d}Z@d~dZAddZBddZCddZDddZEddZFddZGddZHddZIddZJddZKddZLZMS)
HTMLTokenizera	 This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    NcsFt|f||_||_d|_g|_|j|_d|_d|_t	t
|jdS)NF)rstreamparserZ
escapeFlagZ
lastFourChars	dataStatestateescapecurrentTokensuperr__init__)selfrrkwargs)	__class__ /usr/lib/python3.6/_tokenizer.pyr"szHTMLTokenizer.__init__ccs\tg|_xL|jrVx&|jjr:td|jjjddVqWx|jrR|jjVq>WqWdS)z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrorr)typedataN)r
tokenQueuerrerrorsrpoppopleft)rr r r!__iter__1s


zHTMLTokenizer.__iter__c	%Cs(t}d}|rt}d}g}|jj}x(||krJ|tk	rJ|j||jj}q$Wtdj||}|tkrt|}|j	jt
ddd|idnld|kod	kns|d
krd}|j	jt
ddd|idn(d|kod
knsd|kodknsd|kodknsd|ko4dkns|tddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d
g#kr|j	jt
ddd|idyt|}Wn>t
k
r|d6}td|d?Btd7|d8@B}YnX|d9kr$|j	jt
dd:d;|jj||S)<zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
r"z$illegal-codepoint-for-numeric-entity	charAsInt)r#r$datavarsiiiu�riiiiiiiiiiiiiiiiiiiii	i	i
i
iiiii
i
iiiiiiii;z numeric-entity-without-semicolon)r#r$)rr
rcharrappendintjoinrr%r	frozensetchr
ValueErrorunget)	rZisHexZallowedradix	charStackcr-r6vr r r!consumeNumberEntityAs`

&

z!HTMLTokenizer.consumeNumberEntityFc	
Csd}|jjg}|dtksB|dtddfksB|dk	rV||dkrV|jj|dn"|ddkrd}|j|jj|ddkrd	}|j|jj|r|dtks|r|dtkr|jj|d|j|}n4|j	jt
d
dd|jj|jdd
j|}njx8|dtk	rFt
jd
j|s2P|j|jjqWy$t
jd
j|dd}t|}Wntk
rd}YnX|dk	rD|ddkr|j	jt
d
dd|ddkr|r||tks||tks||dkr|jj|jdd
j|}n.t|}|jj|j|d
j||d7}n4|j	jt
d
dd|jj|jdd
j|}|r|jddd|7<n*|tkrd}nd}|j	jt
||ddS)N&r<#FrxXTr"zexpected-numeric-entity)r#r$r,r5znamed-entity-without-semicolon=zexpected-named-entityr$SpaceCharacters
Characters)rFrGrKrKrKrKrKrKrKrK)rr6rrr=r7r
rrBr%rr'r9entitiesTrieZhas_keys_with_prefixZlongest_prefixlenKeyErrorr
r	r)	rallowedChar
fromAttributeoutputr?hexZ
entityNameZentityLengthZ	tokenTyper r r!
consumeEntitysf





zHTMLTokenizer.consumeEntitycCs|j|dddS)zIThis method replaces the need for "entityInAttributeValueState".
        T)rOrPN)rS)rrOr r r!processEntityInAttributesz&HTMLTokenizer.processEntityInAttributecCs|j}|dtkrp|djt|d<|dtdkrp|drR|jjtddd|drp|jjtdd	d|jj||j|_d
S)zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r#nameEndTagr$r"zattributes-in-end-tag)r#r$selfClosingzself-closing-flag-on-end-tagN)	rr	translaterrr%r7rr)rtokenr r r!emitCurrentTokens

zHTMLTokenizer.emitCurrentTokencCs|jj}|dkr|j|_n|dkr.|j|_n|dkrd|jjtddd|jjtdddn`|tkrpdS|t	kr|jjtd	||jj
t	d
dn&|jj
d}|jjtd||dd
S)NrCrDr"zinvalid-codepoint)r#r$rJFrIT)rCrDr[)rr6entityDataStatertagOpenStater%r7rrr
charsUntil)rr$charsr r r!rs&



zHTMLTokenizer.dataStatecCs|j|j|_dS)NT)rSrr)rr r r!r\szHTMLTokenizer.entityDataStatecCs|jj}|dkr|j|_n|dkr.|j|_n|tkr:dS|dkrp|jjtddd|jjtdd	dnT|t	kr|jjtd
||jj
t	ddn&|jj
d}|jjtd||ddS)
NrCrDFr[r"zinvalid-codepoint)r#r$rJu�rIT)rCrDr[)rr6characterReferenceInRcdatarrcdataLessThanSignStaterr%r7rrr^)rr$r_r r r!rcdataStates&



zHTMLTokenizer.rcdataStatecCs|j|j|_dS)NT)rSrbr)rr r r!r`1sz(HTMLTokenizer.characterReferenceInRcdatacCs|jj}|dkr|j|_nh|dkrR|jjtddd|jjtdddn2|tkr^dS|jjd
}|jjtd||dd	S)NrDr[r"zinvalid-codepoint)r#r$rJu�FT)rDr[)	rr6rawtextLessThanSignStaterr%r7rrr^)rr$r_r r r!rawtextState6s


zHTMLTokenizer.rawtextStatecCs|jj}|dkr|j|_nh|dkrR|jjtddd|jjtdddn2|tkr^dS|jjd
}|jjtd||dd	S)NrDr[r"zinvalid-codepoint)r#r$rJu�FT)rDr[)	rr6scriptDataLessThanSignStaterr%r7rrr^)rr$r_r r r!scriptDataStateHs


zHTMLTokenizer.scriptDataStatecCsr|jj}|tkrdS|dkrL|jjtddd|jjtdddn"|jjtd||jjdddS)	NFr[r"zinvalid-codepoint)r#r$rJu�T)rr6rr%r7rr^)rr$r r r!plaintextStateZs

zHTMLTokenizer.plaintextStatecCs|jj}|dkr|j|_n|dkr.|j|_n|tkrVtd|gddd|_|j|_n|dkr|j	j
tddd	|j	j
td
dd	|j|_nt|dkr|j	j
tdd
d	|jj||j
|_n@|j	j
tddd	|j	j
td
dd	|jj||j|_dS)N!/ZStartTagF)r#rUr$rWZselfClosingAcknowledged>r"z'expected-tag-name-but-got-right-bracket)r#r$rJz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerDT)rr6markupDeclarationOpenStatercloseTagOpenStater
rrtagNameStater%r7rr=bogusCommentState)rr$r r r!r]is6









zHTMLTokenizer.tagOpenStatecCs|jj}|tkr0td|gdd|_|j|_n|dkrX|jjtddd|j	|_nn|t
kr|jjtddd|jjtd	d
d|j	|_n0|jjtddd|id
|jj||j|_dS)NrVF)r#rUr$rWrjr"z*expected-closing-tag-but-got-right-bracket)r#r$z expected-closing-tag-but-got-eofrJz</z!expected-closing-tag-but-got-charr$)r#r$r.T)
rr6r
rrrnrr%r7rrr=ro)rr$r r r!rms(





zHTMLTokenizer.closeTagOpenStatecCs|jj}|tkr|j|_n|dkr.|jn~|tkrV|jjt	ddd|j
|_nV|dkrh|j|_nD|dkr|jjt	ddd|jdd	7<n|jd|7<d
S)Nrjr"zeof-in-tag-name)r#r$rir[zinvalid-codepointrUu�T)
rr6rbeforeAttributeNameStaterrZrr%r7rrselfClosingStartTagStater)rr$r r r!rns"






zHTMLTokenizer.tagNameStatecCsP|jj}|dkr"d|_|j|_n*|jjtddd|jj||j	|_dS)Nrir,rJrD)r#r$T)
rr6temporaryBufferrcdataEndTagOpenStaterr%r7rr=rb)rr$r r r!ras

z%HTMLTokenizer.rcdataLessThanSignStatecCsX|jj}|tkr*|j|7_|j|_n*|jjtddd|jj	||j
|_dS)NrJz</)r#r$T)rr6r
rrrcdataEndTagNameStaterr%r7rr=rb)rr$r r r!rss

z#HTMLTokenizer.rcdataEndTagOpenStatecCs|jo|jdj|jjk}|jj}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|j
|j|_nH|tkr|j|7_n0|j
jtdd|jd	|jj||j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)rlowerrrrr6rrrprrqrZrr
r%r7r=rb)rappropriater$r r r!rts2



z#HTMLTokenizer.rcdataEndTagNameStatecCsP|jj}|dkr"d|_|j|_n*|jjtddd|jj||j	|_dS)Nrir,rJrD)r#r$T)
rr6rrrawtextEndTagOpenStaterr%r7rr=rd)rr$r r r!rcs

z&HTMLTokenizer.rawtextLessThanSignStatecCsX|jj}|tkr*|j|7_|j|_n*|jjtddd|jj	||j
|_dS)NrJz</)r#r$T)rr6r
rrrawtextEndTagNameStaterr%r7rr=rd)rr$r r r!rws

z$HTMLTokenizer.rawtextEndTagOpenStatecCs|jo|jdj|jjk}|jj}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|j
|j|_nH|tkr|j|7_n0|j
jtdd|jd	|jj||j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=rd)rrvr$r r r!rxs2



z$HTMLTokenizer.rawtextEndTagNameStatecCsx|jj}|dkr"d|_|j|_nR|dkrJ|jjtddd|j|_n*|jjtddd|jj	||j
|_dS)	Nrir,rhrJz<!)r#r$rDT)rr6rrscriptDataEndTagOpenStaterr%r7rscriptDataEscapeStartStater=rf)rr$r r r!res


z)HTMLTokenizer.scriptDataLessThanSignStatecCsX|jj}|tkr*|j|7_|j|_n*|jjtddd|jj	||j
|_dS)NrJz</)r#r$T)rr6r
rrscriptDataEndTagNameStaterr%r7rr=rf)rr$r r r!ry,s

z'HTMLTokenizer.scriptDataEndTagOpenStatecCs|jo|jdj|jjk}|jj}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|j
|j|_nH|tkr|j|7_n0|j
jtdd|jd	|jj||j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=rf)rrvr$r r r!r{7s2



z'HTMLTokenizer.scriptDataEndTagNameStatecCsJ|jj}|dkr2|jjtddd|j|_n|jj||j|_dS)N-rJ)r#r$T)	rr6r%r7rscriptDataEscapeStartDashStaterr=rf)rr$r r r!rzSs

z(HTMLTokenizer.scriptDataEscapeStartStatecCsJ|jj}|dkr2|jjtddd|j|_n|jj||j|_dS)Nr|rJ)r#r$T)	rr6r%r7rscriptDataEscapedDashDashStaterr=rf)rr$r r r!r}]s

z,HTMLTokenizer.scriptDataEscapeStartDashStatecCs|jj}|dkr2|jjtddd|j|_n|dkrD|j|_nn|dkrz|jjtddd|jjtdddn8|tkr|j	|_n&|jj
d
}|jjtd||dd	S)Nr|rJ)r#r$rDr[r"zinvalid-codepointu�T)rDr|r[)rr6r%r7rscriptDataEscapedDashStater"scriptDataEscapedLessThanSignStaterrr^)rr$r_r r r!scriptDataEscapedStategs"




z$HTMLTokenizer.scriptDataEscapedStatecCs|jj}|dkr2|jjtddd|j|_n|dkrD|j|_nn|dkr|jjtddd|jjtddd|j|_n0|t	kr|j
|_n|jjtd|d|j|_d	S)
Nr|rJ)r#r$rDr[r"zinvalid-codepointu�T)rr6r%r7rr~rrrrr)rr$r r r!r{s"






z(HTMLTokenizer.scriptDataEscapedDashStatecCs|jj}|dkr*|jjtdddn|dkr<|j|_n|dkrd|jjtddd|j|_nn|dkr|jjtddd|jjtdd	d|j|_n0|t	kr|j
|_n|jjtd|d|j|_d
S)Nr|rJ)r#r$rDrjr[r"zinvalid-codepointu�T)rr6r%r7rrrrfrrr)rr$r r r!r~s&






z,HTMLTokenizer.scriptDataEscapedDashDashStatecCs|jj}|dkr"d|_|j|_n\|tkrT|jjtdd|d||_|j	|_n*|jjtddd|jj
||j|_dS)Nrir,rJrD)r#r$T)rr6rr scriptDataEscapedEndTagOpenStaterr
r%r7r scriptDataDoubleEscapeStartStater=r)rr$r r r!rs


z0HTMLTokenizer.scriptDataEscapedLessThanSignStatecCsP|jj}|tkr"||_|j|_n*|jjtddd|jj	||j
|_dS)NrJz</)r#r$T)rr6r
rr scriptDataEscapedEndTagNameStaterr%r7rr=r)rr$r r r!rs

z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatecCs|jo|jdj|jjk}|jj}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|j
|j|_nH|tkr|j|7_n0|j
jtdd|jd	|jj||j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=r)rrvr$r r r!rs2



z.HTMLTokenizer.scriptDataEscapedEndTagNameStatecCs|jj}|ttdBkrR|jjtd|d|jjdkrH|j	|_
q|j|_
nB|tkr|jjtd|d|j|7_n|jj
||j|_
dS)NrirjrJ)r#r$scriptT)rirj)rr6rr:r%r7rrrruscriptDataDoubleEscapedStaterrr
r=)rr$r r r!rs


z.HTMLTokenizer.scriptDataDoubleEscapeStartStatecCs|jj}|dkr2|jjtddd|j|_n|dkrZ|jjtddd|j|_nt|dkr|jjtddd|jjtdddn>|tkr|jjtdd	d|j	|_n|jjtd|dd
S)Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)
rr6r%r7r scriptDataDoubleEscapedDashStater(scriptDataDoubleEscapedLessThanSignStaterr)rr$r r r!rs$





z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs|jj}|dkr2|jjtddd|j|_n|dkrZ|jjtddd|j|_n|dkr|jjtddd|jjtddd|j|_nF|t	kr|jjtdd	d|j
|_n|jjtd|d|j|_d
S)Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)rr6r%r7r$scriptDataDoubleEscapedDashDashStaterrrrr)rr$r r r!rs(







z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|jj}|dkr*|jjtdddn|dkrR|jjtddd|j|_n|dkrz|jjtddd|j|_n|dkr|jjtddd|jjtdd	d|j|_nF|t	kr|jjtdd
d|j
|_n|jjtd|d|j|_dS)Nr|rJ)r#r$rDrjr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)rr6r%r7rrrrfrrr)rr$r r r!rs,







z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|jj}|dkr8|jjtdddd|_|j|_n|jj||j	|_dS)NrirJ)r#r$r,T)
rr6r%r7rrrscriptDataDoubleEscapeEndStaterr=r)rr$r r r!r0s

z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|jj}|ttdBkrR|jjtd|d|jjdkrH|j	|_
q|j|_
nB|tkr|jjtd|d|j|7_n|jj
||j|_
dS)NrirjrJ)r#r$rT)rirj)rr6rr:r%r7rrrrurrrr
r=)rr$r r r!r;s


z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|jj}|tkr$|jjtdn|tkrJ|jdj|dg|j|_n|dkr\|j	n|dkrn|j
|_n|dkr|jjtd
dd|jdj|dg|j|_n|d
kr|jjtd
dd|jdjddg|j|_nF|t
kr|jjtd
dd|j|_n|jdj|dg|j|_dS)NTr$r,rjri'"rHrDr"z#invalid-character-in-attribute-name)r#r$r[zinvalid-codepointu�z#expected-attribute-name-but-got-eof)rrrHrD)rr6rr^r
rr7attributeNameStaterrZrqr%rrr)rr$r r r!rpKs6










z&HTMLTokenizer.beforeAttributeNameStatecCs|jj}d}d}|dkr&|j|_n0|tkr^|jddd||jjtd7<d}n|dkrld}n|tkr~|j|_n|dkr|j	|_n|d	kr|j
jtd
dd|jdddd
7<d}n|dkr|j
jtd
dd|jddd|7<d}nH|t
kr8|j
jtd
dd|j|_n|jddd|7<d}|r|jdddjt|jddd<xP|jdddD]:\}}|jddd|kr|j
jtd
ddPqW|r|jdS)NTFrHr$rrrjrir[r"zinvalid-codepoint)r#r$u�rrrDz#invalid-character-in-attribute-namezeof-in-attribute-namezduplicate-attributerKrK)rrrDrKrKrKrKrKrK)rr6beforeAttributeValueStaterr
rr^rafterAttributeNameStaterqr%r7rrrrXrrZ)rr$ZleavingThisStateZ	emitTokenrU_r r r!risR








&
z HTMLTokenizer.attributeNameStatecCsF|jj}|tkr$|jjtdn|dkr8|j|_n
|dkrJ|jn|tkrp|jdj	|dg|j
|_n|dkr|j|_n|dkr|jj	t
dd	d
|jdj	ddg|j
|_n|dkr|jj	t
ddd
|jdj	|dg|j
|_nF|tkr&|jj	t
ddd
|j|_n|jdj	|dg|j
|_dS)NTrHrjr$r,rir[r"zinvalid-codepoint)r#r$u�rrrDz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rrrD)rr6rr^rrrZr
rr7rrqr%rrr)rr$r r r!rs:











z%HTMLTokenizer.afterAttributeNameStatecCsj|jj}|tkr$|jjtdnB|dkr8|j|_n.|dkrX|j|_|jj|n|dkrl|j|_n|dkr|j	j
tddd|jn|d	kr|j	j
tdd
d|j
dddd
7<|j|_n|dkr|j	j
tddd|j
ddd|7<|j|_nL|tkrD|j	j
tddd|j|_n"|j
ddd|7<|j|_dS)NTrrCrrjr"z.expected-attribute-value-but-got-right-bracket)r#r$r[zinvalid-codepointr$ru�rHrD`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eofrK)rHrDrrKrK)rr6rr^attributeValueDoubleQuotedStaterattributeValueUnQuotedStater=attributeValueSingleQuotedStater%r7rrZrrr)rr$r r r!rs>










z'HTMLTokenizer.beforeAttributeValueStatecCs|jj}|dkr|j|_n|dkr0|jdn|dkrj|jjtddd|jdddd	7<nN|t	kr|jjtdd
d|j
|_n&|jdd
d||jjd7<dS)NrrCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-double-quoteTrKrK)rrCr[)rr6afterAttributeValueStaterrTr%r7rrrrr^)rr$r r r!rs 




z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs|jj}|dkr|j|_n|dkr0|jdn|dkrj|jjtddd|jdddd	7<nN|t	kr|jjtdd
d|j
|_n&|jdd
d||jjd7<dS)NrrCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-single-quoteTrKrK)rrCr[)rr6rrrTr%r7rrrrr^)rr$r r r!rs 




z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|jj}|tkr|j|_n|dkr2|jdn|dkrD|jn|dkr~|jjt	dd	d
|j
ddd|7<n|d
kr|jjt	ddd
|j
dddd7<nV|tkr|jjt	ddd
|j|_n.|j
ddd||jj
tdtB7<dS)NrCrjrrrHrDrr"z0unexpected-character-in-unquoted-attribute-value)r#r$r$rr[zinvalid-codepointu�z eof-in-attribute-value-no-quotesT)rrrHrDrrKrKrK)rCrjrrrHrDrr[)rr6rrprrTrZr%r7rrrrr^r:)rr$r r r!rs,





z)HTMLTokenizer.attributeValueUnQuotedStatecCs|jj}|tkr|j|_n|dkr.|jnp|dkr@|j|_n^|tkrt|jj	t
ddd|jj||j|_n*|jj	t
ddd|jj||j|_dS)Nrjrir"z$unexpected-EOF-after-attribute-value)r#r$z*unexpected-character-after-attribute-valueT)
rr6rrprrZrqrr%r7rr=r)rr$r r r!r s"






z&HTMLTokenizer.afterAttributeValueStatecCs|jj}|dkr&d|jd<|jn^|tkrZ|jjtddd|jj||j	|_
n*|jjtddd|jj||j|_
dS)NrjTrWr"z#unexpected-EOF-after-solidus-in-tag)r#r$z)unexpected-character-after-solidus-in-tag)rr6rrZrr%r7rr=rrrp)rr$r r r!rq4s





z&HTMLTokenizer.selfClosingStartTagStatecCsD|jjd}|jdd}|jjtd|d|jj|j|_dS)Nrjr[u�Comment)r#r$T)	rr^replacer%r7rr6rr)rr$r r r!roFs
zHTMLTokenizer.bogusCommentStatecCs|jjg}|ddkrT|j|jj|ddkrPtddd|_|j|_dSn|ddkrd}x.d&D]&}|j|jj|d'|krjd}PqjW|rtdddddd|_|j|_dSn|d(dkrH|jdk	rH|jj	j
rH|jj	j
d)j|jj	jkrHd}x2d*D]*}|j|jj|d+|krd}PqW|rH|j
|_dS|jjtdddx|rz|jj|jq`W|j|_dS),Nrr|rr,)r#r$TdDoOr@CtTyYpPeEFZDoctype)r#rUpublicIdsystemIdcorrect[Ar"zexpected-dashes-or-doctyperKrKrK)rrrrr@rrrrrrrrr)rrrrrrrKrKrK)rrrrrrrK)rr6r7rrcommentStartStaterdoctypeStaterZtreeZopenElements	namespaceZdefaultNamespacecdataSectionStater%r=r'ro)rr?matchedexpectedr r r!rlUsR


z(HTMLTokenizer.markupDeclarationOpenStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd	d|jj|j|j|_nP|t	kr|jjtdd
d|jj|j|j|_n|jd|7<|j
|_dS)Nr|r[r"zinvalid-codepoint)r#r$r$u�rjzincorrect-commentzeof-in-commentT)rr6commentStartDashStaterr%r7rrrrcommentState)rr$r r r!rs(






zHTMLTokenizer.commentStartStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd	d|jj|j|j|_nT|t	kr|jjtdd
d|jj|j|j|_n|jdd|7<|j
|_dS)Nr|r[r"zinvalid-codepoint)r#r$r$u-�rjzincorrect-commentzeof-in-commentT)rr6commentEndStaterr%r7rrrrr)rr$r r r!rs(






z#HTMLTokenizer.commentStartDashStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<nT|tkr|jjtddd|jj|j|j	|_n|jd||jj
d
7<d	S)Nr|r[r"zinvalid-codepoint)r#r$r$u�zeof-in-commentT)r|r[)rr6commentEndDashStaterr%r7rrrrr^)rr$r r r!rs




zHTMLTokenizer.commentStatecCs|jj}|dkr|j|_n|dkrV|jjtddd|jdd7<|j|_nT|t	kr|jjtddd|jj|j|j
|_n|jdd|7<|j|_d	S)
Nr|r[r"zinvalid-codepoint)r#r$r$u-�zeof-in-comment-end-dashT)rr6rrr%r7rrrrr)rr$r r r!rs 





z!HTMLTokenizer.commentEndDashStatecCs,|jj}|dkr*|jj|j|j|_n|dkrd|jjtddd|jdd7<|j|_n|dkr|jjtdd	d|j	|_n|d
kr|jjtddd|jd|7<nj|t
kr|jjtddd|jj|j|j|_n4|jjtdd
d|jdd|7<|j|_dS)Nrjr[r"zinvalid-codepoint)r#r$r$u--�rhz,unexpected-bang-after-double-dash-in-commentr|z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)rr6r%r7rrrrrcommentEndBangStater)rr$r r r!rs6









zHTMLTokenizer.commentEndStatecCs|jj}|dkr*|jj|j|j|_n|dkrN|jdd7<|j|_n|dkr|jjtddd|jdd	7<|j	|_nT|t
kr|jjtdd
d|jj|j|j|_n|jdd|7<|j	|_dS)Nrjr|r$z--!r[r"zinvalid-codepoint)r#r$u--!�zeof-in-comment-end-bang-stateT)rr6r%r7rrrrrrr)rr$r r r!rs(






z!HTMLTokenizer.commentEndBangStatecCs|jj}|tkr|j|_nj|tkr\|jjtdddd|j	d<|jj|j	|j
|_n*|jjtddd|jj||j|_dS)Nr"z!expected-doctype-name-but-got-eof)r#r$Frzneed-space-after-doctypeT)rr6rbeforeDoctypeNameStaterrr%r7rrrr=)rr$r r r!rs





zHTMLTokenizer.doctypeStatecCs|jj}|tkrn|dkrT|jjtdddd|jd<|jj|j|j|_n|dkr|jjtdddd	|jd
<|j	|_nR|t
kr|jjtdddd|jd<|jj|j|j|_n||jd
<|j	|_dS)
Nrjr"z+expected-doctype-name-but-got-right-bracket)r#r$Frr[zinvalid-codepointu�rUz!expected-doctype-name-but-got-eofT)rr6rr%r7rrrrdoctypeNameStater)rr$r r r!rs.










z$HTMLTokenizer.beforeDoctypeNameStatecCs|jj}|tkr2|jdjt|jd<|j|_n|dkrh|jdjt|jd<|jj	|j|j
|_n|dkr|jj	tddd|jdd7<|j|_nh|t
kr|jj	tdddd	|jd
<|jdjt|jd<|jj	|j|j
|_n|jd|7<dS)NrUrjr[r"zinvalid-codepoint)r#r$u�zeof-in-doctype-nameFrT)rr6rrrXrafterDoctypeNameStaterr%r7rrrr)rr$r r r!r6s,







zHTMLTokenizer.doctypeNameStatecCsR|jj}|tkrn8|dkr8|jj|j|j|_n|tkrd|jd<|jj	||jjt
ddd|jj|j|j|_n|d!krd	}x$d'D]}|jj}||krd}PqW|r|j|_d	SnJ|d(krd	}x(d.D] }|jj}||krd}PqW|r|j|_d	S|jj	||jjt
ddd|id d|jd<|j
|_d	S)/NrjFrr"zeof-in-doctype)r#r$rrTuUbBlLiIr@rsSrrrrrrmMz*expected-space-or-right-bracket-in-doctyper$)r#r$r.)rrrrrrrrrrr@r)rrrrr)rrrrrrrrrrrr)rrrrr)rr6rr%r7rrrrr=rafterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)rr$rrr r r!rOsT







z#HTMLTokenizer.afterDoctypeNameStatecCs|jj}|tkr|j|_n|d
krP|jjtddd|jj||j|_nT|t	kr|jjtdddd|j
d<|jj|j
|j|_n|jj||j|_d	S)Nrrr"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFrT)rr)rr6r"beforeDoctypePublicIdentifierStaterr%r7rr=rrr)rr$r r r!rs"






z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs|jj}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jjt	dddd	|jd
<|jj|j|j
|_nh|tkr|jjt	dddd	|jd
<|jj|j|j
|_n(|jjt	dddd	|jd
<|j|_d
S)Nrr,rrrjr"zunexpected-end-of-doctype)r#r$Frzeof-in-doctypezunexpected-char-in-doctypeT)
rr6rr(doctypePublicIdentifierDoubleQuotedStater(doctypePublicIdentifierSingleQuotedStater%r7rrrr)rr$r r r!rs4












z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd	dd
|jd<|jj|j|j|_nR|t	kr|jjtdddd
|jd<|jj|j|j|_n|jd|7<d
S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT)
rr6!afterDoctypePublicIdentifierStaterr%r7rrrr)rr$r r r!rs*








z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd	dd
|jd<|jj|j|j|_nR|t	kr|jjtdddd
|jd<|jj|j|j|_n|jd|7<d
S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT)
rr6rrr%r7rrrr)rr$r r r!rs*








z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs|jj}|tkr|j|_n|dkr<|jj|j|j|_n|dkrn|jjt	dddd|jd<|j
|_n|dkr|jjt	dddd|jd<|j|_nh|tkr|jjt	dd	dd
|jd<|jj|j|j|_n(|jjt	dddd
|jd<|j
|_dS)
Nrjrr"zunexpected-char-in-doctype)r#r$r,rrzeof-in-doctypeFrT)rr6r-betweenDoctypePublicAndSystemIdentifiersStaterr%r7rrr(doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStaterr)rr$r r r!rs6













z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs|jj}|tkrn|dkr4|jj|j|j|_n|dkrPd|jd<|j|_n|dkrld|jd<|j	|_nh|t
kr|jjtdddd	|jd
<|jj|j|j|_n(|jjtdddd	|jd
<|j|_dS)
Nrjrr,rrr"zeof-in-doctype)r#r$Frzunexpected-char-in-doctypeT)
rr6rr%r7rrrrrrrr)rr$r r r!rs.










z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|jj}|tkr|j|_n|d
krP|jjtddd|jj||j|_nT|t	kr|jjtdddd|j
d<|jj|j
|j|_n|jj||j|_d	S)Nrrr"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFrT)rr)rr6r"beforeDoctypeSystemIdentifierStaterr%r7rr=rrr)rr$r r r!rs"






z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs|jj}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jjt	dddd	|jd
<|jj|j|j
|_nh|tkr|jjt	dddd	|jd
<|jj|j|j
|_n(|jjt	dddd	|jd
<|j|_dS)
Nrr,rrrjr"zunexpected-char-in-doctype)r#r$Frzeof-in-doctypeT)
rr6rrrrrr%r7rrrr)rr$r r r!r/s4












z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd	dd
|jd<|jj|j|j|_nR|t	kr|jjtdddd
|jd<|jj|j|j|_n|jd|7<d
S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT)
rr6!afterDoctypeSystemIdentifierStaterr%r7rrrr)rr$r r r!rLs*








z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd	dd
|jd<|jj|j|j|_nR|t	kr|jjtdddd
|jd<|jj|j|j|_n|jd|7<d
S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT)
rr6rrr%r7rrrr)rr$r r r!rds*








z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|jj}|tkrn~|dkr4|jj|j|j|_n^|tkrt|jjt	dddd|jd<|jj|j|j|_n|jjt	ddd|j
|_dS)	Nrjr"zeof-in-doctype)r#r$Frzunexpected-char-in-doctypeT)rr6rr%r7rrrrrr)rr$r r r!r|s 





z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|jj}|dkr*|jj|j|j|_n,|tkrV|jj||jj|j|j|_ndS)NrjT)	rr6r%r7rrrrr=)rr$r r r!rs


zHTMLTokenizer.bogusDoctypeStatecCsg}x|j|jjd|j|jjd|jj}|tkr@Pq|dksLt|ddddkrx|ddd|d<Pq|j|qWdj|}|jd}|dkrx&t|D]}|j	jt
d	d
dqW|jdd}|r|j	jt
d
|d|j|_
dS)N]rjrz]]r,r[rr"zinvalid-codepoint)r#r$u�rJTrKrKrrK)r7rr^r6rAssertionErrorr9countranger%rrrr)rr$r6Z	nullCountrr r r!rs0



zHTMLTokenizer.cdataSectionState)N)NF)N__name__
__module____qualname____doc__rr)rBrSrTrZrr\rbr`rdrfrgr]rmrnrarsrtrcrwrxreryr{rzr}rrr~rrrrrrrrrrprrrrrrrrqrorlrrrrrrrrrrrrrrrrrrrrrrr
__classcell__r r )rr!rsH
P#

6 "-3rN)Z
__future__rrrZpip._vendor.sixrr;collectionsrZ	constantsrr	r
rrr
rrrrZ_inputstreamrZ_trierrLobjectrr r r r!<module>s