File "robotparser.cpython-34.pyc"

Full Path: /home/attunedd/public_html/byp/izo/con7ext_sym404/rintoar.txt/opt/alt/python34/lib64/python3.4/urllib/__pycache__/robotparser.cpython-34.pyc
File size: 6.6 KB
MIME-type: text/x-bytecode.python
Charset: 8 bit


e f4@sddZddlZddlZdgZGdddZGdddZGdddZdS)	a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
NRobotFileParserc@seZdZdZdddZddZddZd	d
ZddZd
dZ	ddZ
ddZddZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    cCs>g|_d|_d|_d|_|j|d|_dS)NFr)entries
default_entrydisallow_all	allow_allset_urllast_checked)selfurlr7/opt/alt/python34/lib64/python3.4/urllib/robotparser.py__init__s				
zRobotFileParser.__init__cCs|jS)zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r	)r
rrr
mtimeszRobotFileParser.mtimecCsddl}|j|_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)timer	)r
rrrr
modified(szRobotFileParser.modifiedcCs5||_tjj|dd\|_|_dS)z,Sets the URL referring to a robots.txt file.N)rurllibparseurlparsehostpath)r
rrrr
r0s	zRobotFileParser.set_urlcCsytjj|j}Wnptjjk
r}zJ|jdkrOd|_n*|jdkry|jdkryd|_nWYdd}~Xn)X|j	}|j
|jdjdS)	z4Reads the robots.txt URL and feeds it to the parser.TiiNzutf-8)rr)
rZrequestZurlopenrerrorZ	HTTPErrorcoderrreadrdecode
splitlines)r
ferrrawrrr
r5szRobotFileParser.readcCsAd|jkr-|jdkr=||_q=n|jj|dS)N*)
useragentsrrappend)r
entryrrr

_add_entryBszRobotFileParser._add_entrycCsd}t}|jx|D]}|sx|dkrJt}d}qx|dkrx|j|t}d}qxn|jd}|dkr|d|}n|j}|sq n|jdd}t|dkr |djj|d<tj	j
|dj|d<|ddkrn|dkrQ|j|t}n|jj|dd}q|ddkr|dkr|j
jt|dd	d}qq|dd
kr|dkr|j
jt|ddd}qqq q W|dkr|j|ndS)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr#N:z
user-agentZdisallowFZallowT)Entryrr'findstripsplitlenlowerrrunquoter$r%	rulelinesRuleLine)r
linesstater&lineirrr
rKsL
	

		
	 
	zRobotFileParser.parsecCs|jr
dS|jrdS|js'dStjjtjj|}tjjdd|j|j	|j
|jf}tjj|}|sd}nx-|j
D]"}|j|r|j|SqW|jr|jj|SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTr/)rrr	rrrr1
urlunparserZparamsZqueryZfragmentquoter
applies_to	allowancer)r
	useragentrZ
parsed_urlr&rrr
	can_fetchs$					zRobotFileParser.can_fetchcCsdjdd|jDS)NrcSs g|]}t|dqS)
)str).0r&rrr

<listcomp>s	z+RobotFileParser.__str__.<locals>.<listcomp>)joinr)r
rrr
__str__szRobotFileParser.__str__N)
__name__
__module____qualname____doc__rrrrrr'rr>rDrrrr
rs	
	4c@s:eZdZdZddZddZddZdS)	r3zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs\|dkr|rd}ntjjtjj|}tjj||_||_dS)NrT)rrr9rr:rr<)r
rr<rrr
rs
	zRuleLine.__init__cCs|jdkp|j|jS)Nr#)r
startswith)r
filenamerrr
r;szRuleLine.applies_tocCs|jrdpdd|jS)NZAllowZDisallowz: )r<r)r
rrr
rDszRuleLine.__str__N)rErFrGrHrr;rDrrrr
r3sr3c@sFeZdZdZddZddZddZdd	Zd
S)r+z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r$r2)r
rrr
rs	zEntry.__init__cCsjg}x'|jD]}|jd|dgqWx*|jD]}|jt|dgq:Wdj|S)NzUser-agent: r?r)r$extendr2r@rC)r
Zretagentr6rrr
rDsz
Entry.__str__cCs]|jddj}x=|jD]2}|dkr9dS|j}||kr#dSq#WdS)z2check if this entry applies to the specified agentr8rr#TF)r.r0r$)r
r=rLrrr
r;szEntry.applies_tocCs.x'|jD]}|j|r
|jSq
WdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r2r;r<)r
rJr6rrr
r<szEntry.allowanceN)rErFrGrHrrDr;r<rrrr
r+s

r+)rHZurllib.parserZurllib.request__all__rr3r+rrrr
<module>s