B YI~d9E�@s�ddlZddlZddlZddlmZdgZe�d�Ze�d�Ze�d�Z e�d�Z e�d�Z e�d �Z e�d �Z e�d �Ze�d �Ze�d ej�Ze�d �Ze�d�ZGdd�dej�ZdS)�N)�unescape� HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s�eZdZdZdd�dd�Zdd�Zdd �Zd d �Zd Zd d�Z dd�Z dd�Z dd�Z dd�Z d8dd�Zdd�Zdd�Zdd�Zd d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�Zd S)9r)ZscriptZstyleT)�convert_charrefscCs||_|��dS)N)r�reset)�selfr�r�0/opt/alt/python37/lib64/python3.7/html/parser.py�__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tj�|�dS)N�z???)�rawdata�lasttag�interesting_normal� interesting� cdata_elem� _markupbase� ParserBaser)rrrr r`s zHTMLParser.resetcCs|j||_|�d�dS)Nr)r �goahead)r�datarrr �feedhs zHTMLParser.feedcCs|�d�dS)N�)r)rrrr �closeqszHTMLParser.closeNcCs|jS)N)�_HTMLParser__starttag_text)rrrr �get_starttag_textwszHTMLParser.get_starttag_textcCs$|��|_t�d|jtj�|_dS)Nz </\s*%s\s*>)�lowerr�re�compile�Ir)r�elemrrr �set_cdata_mode{s zHTMLParser.set_cdata_modecCst|_d|_dS)N)rrr)rrrr �clear_cdata_modeszHTMLParser.clear_cdata_modec Cs@|j}d}t|�}�x�||k�r�|jrx|jsx|�d|�}|dkr�|�dt||d��}|dkrrt�d�� ||�srP|}n(|j � ||�}|r�|� �}n |jr�P|}||kr�|jr�|js�|� t |||���n|� |||��|�||�}||kr�P|j}|d|��rDt�||��r |�|�} n�|d|��r8|�|�} nl|d|��rP|�|�} nT|d|��rh|�|�} n<|d |��r�|�|�} n$|d |k�r�|� d�|d } nP| dk�r6|�s�P|�d |d �} | dk�r�|�d|d �} | dk�r�|d } n| d 7} |j�r$|j�s$|� t ||| ���n|� ||| ��|�|| �}q|d |��r�t�||�}|�r�|��d d�} |�| �|��} |d| d ��s�| d } |�|| �}qn:d||d�k�r�|� |||d ��|�||d �}Pq|d|�rt�||�}|�rF|�d �} |�| �|��} |d| d ��s8| d } |�|| �}qt�||�}|�r�|�r�|��||d�k�r�|��} | |k�r�|} |�||d �}Pn,|d |k�r�|� d�|�||d �}nPqqW|�r.||k�r.|j�s.|j�r|j�s|� t |||���n|� |||��|�||�}||d�|_dS)Nr�<�&�"z[\s;]z</z<!--z<?z<!rrz&#�����;)r �lenrr�find�rfind�maxrr�searchr�start� handle_datarZ updatepos� startswith� starttagopen�match�parse_starttag� parse_endtag� parse_comment�parse_pi�parse_html_declaration�charref�group�handle_charref�end� entityref�handle_entityref� incomplete) rr9r �i�n�jZampposr0r.�k�namerrr r�s�                                  zHTMLParser.goaheadcCs�|j}|||d�dkr$|�|�S|||d�dkrB|�|�S|||d���dkr�|�d|d�}|dkrvdS|�||d |��|d S|�|�SdS) N�z<!--�z<![� z <!doctyperr%r$r)r r3Zparse_marked_sectionrr(� handle_decl�parse_bogus_comment)rr=r �gtposrrr r5s  z!HTMLParser.parse_html_declarationrcCsD|j}|�d|d�}|dkr"dS|r<|�||d|��|dS)Nrr$r%r)r r(�handle_comment)rr=Zreportr �posrrr rFszHTMLParser.parse_bogus_commentcCsH|j}t�||d�}|sdS|��}|�||d|��|��}|S)Nr$r%)r �picloser+r,� handle_pir9)rr=r r0r?rrr r4!szHTMLParser.parse_picCs�d|_|�|�}|dkr|S|j}|||�|_g}t�||d�}|��}|�d���|_}x�||k�r t �||�}|s~P|�ddd�\} } } | s�d} nZ| dd�dkr�| dd�ks�n| dd�dkr�| dd�kr�nn | dd�} | �rt | �} |� | ��| f�|��}qbW|||�� �} | dk�r�|� �\} }d |jk�r|| |j�d �} t|j�|j�d �}n|t|j�}|�|||��|S| �d ��r�|�||�n"|�||�||jk�r�|�|�|S) Nrrr$rC�'r%�")rz/>� z/>)r�check_for_whole_start_tagr �tagfind_tolerantr0r9r7rr �attrfind_tolerantr�append�stripZgetpos�countr'r)r-�endswith�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr)rr=�endposr �attrsr0r@�tag�mZattrname�restZ attrvaluer9�lineno�offsetrrr r1-sP   &(         zHTMLParser.parse_starttagcCs�|j}t�||�}|r�|��}|||d�}|dkr>|dS|dkr~|�d|�rZ|dS|�d|�rjdS||krv|S|dS|dkr�dS|dkr�dS||kr�|S|dStd ��dS) Nrr�/z/>r$r%r z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r �locatestarttagend_tolerantr0r9r.�AssertionError)rr=r r\r?�nextrrr rO`s.   z$HTMLParser.check_for_whole_start_tagcCs|j}t�||d�}|sdS|��}t�||�}|s�|jdk rV|�|||��|St�||d�}|s�|||d�dkr�|dS|� |�S|� d�� �}|� d|���}|� |�|dS|� d�� �}|jdk r�||jkr�|�|||��|S|� |�|��|S)Nrr%r$rCz</>r)r � endendtagr+r9� endtagfindr0rr-rPrFr7rr(� handle_endtagr )rr=r r0rGZ namematchZtagnamerrrr r2�s6       zHTMLParser.parse_endtagcCs|�||�|�|�dS)N)rWrf)rr[rZrrr rV�s zHTMLParser.handle_startendtagcCsdS)Nr)rr[rZrrr rW�szHTMLParser.handle_starttagcCsdS)Nr)rr[rrr rf�szHTMLParser.handle_endtagcCsdS)Nr)rrArrr r8�szHTMLParser.handle_charrefcCsdS)Nr)rrArrr r;�szHTMLParser.handle_entityrefcCsdS)Nr)rrrrr r-�szHTMLParser.handle_datacCsdS)Nr)rrrrr rH�szHTMLParser.handle_commentcCsdS)Nr)rZdeclrrr rE�szHTMLParser.handle_declcCsdS)Nr)rrrrr rK�szHTMLParser.handle_picCsdS)Nr)rrrrr � unknown_decl�szHTMLParser.unknown_declcCstjdtdd�t|�S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r$)� stacklevel)�warnings�warn�DeprecationWarningr)r�srrr r�s zHTMLParser.unescape)r)�__name__� __module__� __qualname__rXr rrrrrrr rr5rFr4r1rOr2rVrWrfr8r;r-rHrErKrgrrrrr r?s6  z  3"()rrirZhtmlr�__all__rrr<r:r6r/rJZ commentcloserPrQ�VERBOSErardrerrrrrr �<module> s&