File "robotparser.cpython-33.pyc"

Full Path: /home/attunedd/public_html/byp/izo/con7ext_sym404/rintoar.txt/opt/alt/python33/lib64/python3.3/urllib/__pycache__/robotparser.cpython-33.pyc
File size: 9.17 KB
MIME-type: text/x-bytecode.python
Charset: 8 bit


fc@sddZddlZddlZdgZGdddZGdddZGdddZdS(	u< robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
iNuRobotFileParsercBs|EeZdZdZdddZddZddZd	d
ZddZd
dZ	ddZ
ddZddZdS(uRobotFileParserus This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    ucCs>g|_d|_d|_d|_|j|d|_dS(NiF(uentriesuNoneu
default_entryuFalseudisallow_allu	allow_alluset_urlulast_checked(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s				
uRobotFileParser.__init__cCs|jS(uReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        (ulast_checked(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumtimesuRobotFileParser.mtimecCsddl}|j|_dS(uYSets the time the robots.txt file was last fetched to the
        current time.

        iN(utimeulast_checked(uselfutime((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumodified(suRobotFileParser.modifiedcCs5||_tjj|dd\|_|_dS(u,Sets the URL referring to a robots.txt file.iiN(uurluurllibuparseuurlparseuhostupath(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuset_url0s	uRobotFileParser.set_urlcCsytjj|j}Wnatjjk
r|}z;|jdkrOd|_n|jdkrjd|_	nWYdd}~Xn)X|j
}|j|jdj
dS(u4Reads the robots.txt URL and feeds it to the parser.iiiNuutf-8(iiT(uurlliburequestuurlopenuurluerroru	HTTPErrorucodeuTrueudisallow_allu	allow_allureaduparseudecodeu
splitlines(uselfufuerruraw((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuread5suRobotFileParser.readcCsAd|jkr-|jdkr=||_q=n|jj|dS(Nu*(u
useragentsu
default_entryuNoneuentriesuappend(uselfuentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
_add_entryBsuRobotFileParser._add_entrycCsd}t}x|D]}|sn|dkr@t}d}qn|dkrn|j|t}d}qnn|jd}|dkr|d|}n|j}|sqn|jdd}t|dkr|djj|d<tjj	|dj|d<|ddkrd|dkrG|j|t}n|j
j|dd}q|ddkr|dkr|jjt
|dd
d}qq|dd	kr|dkr|jjt
|ddd}qqqqW|dkr|j|ndS(uParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        iiiu#Nu:u
user-agentudisallowuallowFT(uEntryu
_add_entryufindustripusplitulenuloweruurllibuparseuunquoteu
useragentsuappendu	rulelinesuRuleLineuFalseuTrue(uselfulinesustateuentryulineui((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuparseKsJ
	
		
	 
	uRobotFileParser.parsecCs|jr
dS|jrdStjjtjj|}tjjdd|j	|j
|j|jf}tjj
|}|sd}nx-|jD]"}|j|r|j|SqW|jr|jj|SdS(u=using the parsed robots.txt decide if useragent can fetch urluu/FT(udisallow_alluFalseu	allow_alluTrueuurllibuparseuurlparseuunquoteu
urlunparseupathuparamsuqueryufragmentuquoteuentriesu
applies_tou	allowanceu
default_entry(uselfu	useragentuurlu
parsed_urluentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu	can_fetch~s 				uRobotFileParser.can_fetchcCsdjdd|jDS(NucSs g|]}t|dqS(u
(ustr(u.0uentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
<listcomp>s	u+RobotFileParser.__str__.<locals>.<listcomp>(ujoinuentries(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__suRobotFileParser.__str__N(
u__name__u
__module__u__qualname__u__doc__u__init__umtimeumodifieduset_urlureadu
_add_entryuparseu	can_fetchu__str__(u
__locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRobotFileParsers	
	3cBs>|EeZdZdZddZddZddZdS(	uRuleLineuoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs\|dkr|rd}ntjjtjj|}tjj||_||_dS(NuT(uTrueuurllibuparseu
urlunparseuurlparseuquoteupathu	allowance(uselfupathu	allowance((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s
	uRuleLine.__init__cCs|jdkp|j|jS(Nu*(upathu
startswith(uselfufilename((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
applies_tosuRuleLine.applies_tocCs|jrdpdd|jS(NuAllowuDisallowu: (u	allowanceupath(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__suRuleLine.__str__N(u__name__u
__module__u__qualname__u__doc__u__init__u
applies_tou__str__(u
__locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRuleLinesuRuleLinecBsJ|EeZdZdZddZddZddZdd	Zd
S(uEntryu?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS(N(u
useragentsu	rulelines(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s	uEntry.__init__cCsjg}x'|jD]}|jd|dgqWx*|jD]}|jt|dgq:Wdj|S(NuUser-agent: u
u(u
useragentsuextendu	rulelinesustrujoin(uselfuretuagentuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__su
Entry.__str__cCs]|jddj}x=|jD]2}|dkr9dS|j}||kr#dSq#WdS(u2check if this entry applies to the specified agentu/iu*TF(usplituloweru
useragentsuTrueuFalse(uselfu	useragentuagent((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
applies_tosuEntry.applies_tocCs.x'|jD]}|j|r
|jSq
WdS(uZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT(u	rulelinesu
applies_tou	allowanceuTrue(uselfufilenameuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu	allowancesuEntry.allowanceN(u__name__u
__module__u__qualname__u__doc__u__init__u__str__u
applies_tou	allowance(u
__locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuEntrys

uEntry(u__doc__uurllib.parseuurllibuurllib.requestu__all__uRobotFileParseruRuleLineuEntry(((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu<module>s