� h f�S�@sldZddlZddlZddlZddlmZdgZejd�Zejd�Z ejd�Z ejd�Z ejd �Z ejd �Z ejd �Zejd �Zejd �Zejd�Zejd�Zejdej�Zejdej�Zejd �Zejd�ZGdd�de�Ze�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape� HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]�>z--\s*>z(([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*z$([a-zA-Z][^ />]*)(?:\s|/(?!>))*zJ\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*a� <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:\s+ # whitespace before attribute name (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name (?:\s*=\s* # value indicator (?:'[^']*' # LITA-enclosed value |\"[^\"]*\" # LIT-enclosed value |[^'\">\s]+ # bare value ) )? ) )* \s* # trailing whitespace aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s1eZdZdZddd�Zdd�ZdS)�HTMLParseErrorz&Exception raised for all parse errors.NcCs3|s t�||_|d|_|d|_dS)Nr�)�AssertionError�msg�lineno�offset)�selfrZposition�r �0/opt/alt/python34/lib64/python3.4/html/parser.py�__init__Us   zHTMLParseError.__init__cCsW|j}|jdk r,|d|j}n|jdk rS|d|jd}n|S)Nz , at line %dz , column %dr)rr r )r �resultr r r �__str__[s  zHTMLParseError.__str__)NN)�__name__� __module__� __qualname__�__doc__rrr r r r rRs rc@sfeZdZdZd;Zededd�Zdd�Zd d �Zd d �Z d d�Z dZ dd�Z dd�Z dd�Zdd�Zdd�Zddd�Zdd�Zdd �Zd!d"�Zd#d$�Zd%d&�Zd'd(�Zd)d*�Zd+d,�Zd-d.�Zd/d0�Zd1d2�Zd3d4�Zd5d6�Zd7d8�Zd9d:�Z dS)<raEFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). If convert_charrefs is True the character references are converted automatically to the corresponding Unicode character (and self.handle_data() is no longer split in chunks), otherwise they are passed by calling self.handle_entityref() or self.handle_charref() with the string containing respectively the named or numeric reference as the argument. �script�style�convert_charrefscCsv|tk r%tjdtdd�nd}||_|tkr_d}tjdtdd�n||_|j�dS)a�Initialize and reset this instance. If convert_charrefs is True (default: False), all character references are automatically converted to the corresponding Unicode characters. If strict is set to False (the default) the parser will parse invalid markup, otherwise it will raise an error. Note that the strict mode and argument are deprecated. z,The strict argument and mode are deprecated.� stacklevel�FzfThe value of convert_charrefs will become True in 3.5. You are encouraged to set the value explicitly.N)�_default_sentinel�warnings�warn�DeprecationWarning�strictr�reset)r rrr r r r~s      zHTMLParser.__init__cCs8d|_d|_t|_d|_tjj|�dS)z1Reset this instance. Loses all unprocessed data.�z???N)�rawdata�lasttag�interesting_normal� interesting� cdata_elem� _markupbase� ParserBaser)r r r r r�s     zHTMLParser.resetcCs!|j||_|jd�dS)z�Feed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). rN)r!�goahead)r �datar r r �feed�szHTMLParser.feedcCs|jd�dS)zHandle any buffered data.rN)r()r r r r �close�szHTMLParser.closecCs/tjdtdd�t||j���dS)Nz!The 'error' method is deprecated.rr)rrrr�getpos)r �messager r r �error�s  zHTMLParser.errorNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_text)r r r r �get_starttag_text�szHTMLParser.get_starttag_textcCs2|j�|_tjd|jtj�|_dS)Nz </\s*%s\s*>)�lowerr%�re�compile�Ir$)r �elemr r r �set_cdata_mode�szHTMLParser.set_cdata_modecCst|_d|_dS)N)r#r$r%)r r r r �clear_cdata_mode�s zHTMLParser.clear_cdata_modec Cs5|j}d}t|�}x�||kr�|jr�|j r�|jd|�}|dkr�|jdt||d��}|dkr�tjd�j ||� r�Pn|}q�n=|j j ||�}|r�|j �}n|jr�Pn|}||krH|jr.|j r.|j t |||���qH|j |||��n|j||�}||krjPn|j}|d|�r_tj||�r�|j|�} n�|d|�r�|j|�} n�|d|�r�|j|�} n�|d|�r |j|�} ng|d |�rE|jr3|j|�} qp|j|�} n+|d |kro|j d�|d } nP| dkrJ|s�Pn|jr�|jd �n|jd |d �} | dkr�|jd|d �} | dkr�|d } q�n | d 7} |jr0|j r0|j t ||| ���qJ|j ||| ��n|j|| �}q|d |�r;tj||�}|r�|j�dd�} |j| �|j�} |d| d �s�| d } n|j|| �}qq�d||d�kr7|j |||d��|j||d�}nPq|d|�r�tj||�}|r�|jd �} |j| �|j�} |d| d �s�| d } n|j|| �}qnt j||�}|rS|rO|j�||d�krO|jr|jd�qO|j�} | |kr6|} n|j||d �}nPq�|d |kr�|j d�|j||d �}q�Pqdst!d��qW|r||kr|j r|jr�|j r�|j t |||���n|j |||��|j||�}n||d�|_dS)Nr�<�&�"z[\s;]z</z<!--z<?z<!rzEOF in middle of constructrz&#r�;z#EOF in middle of entity or char refzinteresting.search() lied���)"r!�lenrr%�find�rfind�maxr2r3�searchr$�start� handle_datar� updatepos� startswith� starttagopen�match�parse_starttag� parse_endtag� parse_comment�parse_pirZparse_declaration�parse_html_declarationr.�charref�group�handle_charref�end� entityref�handle_entityref� incompleter) r rPr!�i�n�jZampposrGrE�k�namer r r r(�s�                         "      zHTMLParser.goaheadcCs�|j}|||d�dks/td��|||d�dkrV|j|�S|||d�dkr}|j|�S|||d�j�d kr�|jd |d�}|d kr�d S|j||d|��|d S|j|�SdS)Nrz<!z+unexpected call to parse_html_declaration()�z<!--�z<![� z <!doctyperrr<r<)r!rrJZparse_marked_sectionr1r>� handle_decl�parse_bogus_comment)r rTr!�gtposr r r rLCs &    z!HTMLParser.parse_html_declarationrcCs�|j}|||d�dks/td��|jd|d�}|dkrUd S|ry|j||d|��n|dS) Nr�<!�</z"unexpected call to parse_comment()rr)r_r`r<r<)r!rr>�handle_comment)r rTZreportr!�posr r r r]Xs & zHTMLParser.parse_bogus_commentcCs�|j}|||d�dks/td��tj||d�}|sOdS|j�}|j||d|��|j�}|S)Nrz<?zunexpected call to parse_pi()rr<)r!r�picloserArB� handle_pirP)r rTr!rGrVr r r rKds &  zHTMLParser.parse_picCsd|_|j|�}|dkr(|S|j}|||�|_g}|jrltj||d�}ntj||d�}|s�td��|j�}|j d�j �|_ }x$||kr�|jr�t j||�}nt j||�}|sPn|j ddd�\} } } | s2d} ns| dd�dko]| d d�kns�| dd�dko�| dd�knr�| dd�} n| r�t| �} n|j| j �| f�|j�}q�W|||�j�} | dkr�|j�\} }d |jkr^| |jjd �} t|j�|jjd �}n|t|j�}|jr�|jd |||�dd �f�n|j|||��|S| jd �r�|j||�n/|j||�||jkr|j|�n|S)Nrrz#unexpected call to parse_starttag()rrZ�'�"r�/>� z junk characters in start tag: %r�r<r<r<)rrg)r/�check_for_whole_start_tagr!r�tagfindrG�tagfind_tolerantrrPrNr1r"�attrfind�attrfind_tolerantr�append�stripr,�countr=r?r.rC�endswith�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr6)r rT�endposr!�attrsrGrW�tag�mZattrname�restZ attrvaluerPr r r r r rHps`       00    "zHTMLParser.parse_starttagcCsk|j}|jr'tj||�}ntj||�}|r[|j�}|||d�}|dkrs|dS|dkr�|jd|�r�|dS|jd|�r�d S|jr�|j||d�|jd�n||kr�|S|dSn|dkrd S|dkrd S|jr@|j||�|jd �n||krP|S|dSnt d ��dS)Nrr�/z/>rzmalformed empty start tagr z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzmalformed start tagzwe should not get here!r<r<r<) r!r�locatestarttagendrG�locatestarttagend_tolerantrPrErDr.r)r rTr!ryrV�nextr r r rj�s>             z$HTMLParser.check_for_whole_start_tagcCs�|j}|||d�dks/td��tj||d�}|sOd S|j�}tj||�}|sW|jdk r�|j|||��|S|j r�|j d|||�f�nt j||d�}|s|||d�dkr|dS|j |�Sn|j d�j�}|jd|j��}|j|�|dS|j d�j�}|jdk r�||jkr�|j|||��|Sn|j|j��|j�|S) Nrz</zunexpected call to parse_endtagrzbad end tag: %rrZz</>rr<)r!r� endendtagrArP� endtagfindrGr%rCrr.rlr]rNr1r>� handle_endtagr7)r rTr!rGr^Z namematchZtagnamer5r r r rI�s< &  !  zHTMLParser.parse_endtagcCs!|j||�|j|�dS)N)rtr�)r rxrwr r r rsszHTMLParser.handle_startendtagcCsdS)Nr )r rxrwr r r rtszHTMLParser.handle_starttagcCsdS)Nr )r rxr r r r� szHTMLParser.handle_endtagcCsdS)Nr )r rXr r r rOszHTMLParser.handle_charrefcCsdS)Nr )r rXr r r rRszHTMLParser.handle_entityrefcCsdS)Nr )r r)r r r rCszHTMLParser.handle_datacCsdS)Nr )r r)r r r raszHTMLParser.handle_commentcCsdS)Nr )r Zdeclr r r r\szHTMLParser.handle_declcCsdS)Nr )r r)r r r rd"szHTMLParser.handle_picCs$|jr |jd|f�ndS)Nzunknown declaration: %r)rr.)r r)r r r � unknown_decl%s zHTMLParser.unknown_declcCs tjdtdd�t|�S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.rr)rrrr)r �sr r r r*s  zHTMLParser.unescape)rr)!rrrrrurrrr*r+r.r/r0r6r7r(rLr]rKrHrjrIrsrtr�rOrRrCrar\rdr�rr r r r rfs<        �  < + *          )rr2rr&Zhtmlr�__all__r3r#rSrQrMrFrcZ commentcloserkrlrmrn�VERBOSEr|r}rr�� Exceptionr�objectrr'rr r r r �<module>s6