File "robotparser.cpython-39.pyc"

Full Path: /home/attunedd/public_html/byp/izo/con7ext_sym404/rintoar.txt/opt/alt/python39/lib64/python3.9/urllib/__pycache__/robotparser.cpython-39.pyc
File size: 7.17 KB
MIME-type: text/x-bytecode.python
Charset: 8 bit

a

XC?h$@s\dZddlZddlZddlZdgZeddZGdddZGdddZ	Gd	d
d
Z
dS)a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
NRobotFileParserRequestRatezrequests secondsc@sreZdZdZdddZddZddZd	d
ZddZd
dZ	ddZ
ddZddZddZ
ddZddZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    cCs2g|_g|_d|_d|_d|_||d|_dS)NFr)entriessitemaps
default_entrydisallow_all	allow_allset_urllast_checkedselfurlr7/opt/alt/python39/lib64/python3.9/urllib/robotparser.py__init__s
zRobotFileParser.__init__cCs|jS)zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )rr
rrrmtime%szRobotFileParser.mtimecCsddl}||_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)timer)r
rrrrmodified.szRobotFileParser.modifiedcCs&||_tj|dd\|_|_dS)z,Sets the URL referring to a robots.txt file.N)rurllibparseurlparseZhostpathrrrrr
6szRobotFileParser.set_urlc
Csztj|j}WnTtjjyf}z8|jdvr8d|_n|jdkrR|jdkrRd|_WYd}~n&d}~00|	}|
|ddS)z4Reads the robots.txt URL and feeds it to the parser.)iiTiiNzutf-8)
rZrequestZurlopenrerrorZ	HTTPErrorcoderr	readrdecode
splitlines)r
ferrrawrrrr;s
zRobotFileParser.readcCs,d|jvr|jdur(||_n|j|dSN*)
useragentsrrappend)r
entryrrr
_add_entryHs

zRobotFileParser._add_entrycCsPd}t}||D]}|sP|dkr4t}d}n|dkrP||t}d}|d}|dkrn|d|}|}|s|q|dd}t|dkr|d|d<tj	
|d|d<|ddkr|dkr||t}|j|dd}q|ddkr.|dkr6|j
t|dd	d}q|dd
krb|dkr6|j
t|ddd}q|ddkr|dkr6|drt|d|_d}q|dd
kr|dkr6|dd}t|dkr|dr|drtt|dt|d|_d}q|ddkr|j|dq|dkrL||dS)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr#N:z
user-agentZdisallowFZallowTzcrawl-delayzrequest-rate/Zsitemap)Entryrr)findstripsplitlenlowerrrunquoter&r'	rulelinesRuleLineisdigitintdelayrreq_rater)r
linesstater(lineiZnumbersrrrrQsj








 
zRobotFileParser.parsecCs|jr
dS|jrdS|jsdStjtj|}tjdd|j|j	|j
|jf}tj|}|sfd}|j
D]}||rl||Sql|jr|j|SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTrr-)rr	rrrrr4
urlunparserparamsZqueryZfragmentquoter
applies_to	allowancer)r
	useragentrZ
parsed_urlr(rrr	can_fetchs&

zRobotFileParser.can_fetchcCs>|sdS|jD]}||r|jSq|jr:|jjSdSN)rrrBr9rr
rDr(rrrcrawl_delays

zRobotFileParser.crawl_delaycCs>|sdS|jD]}||r|jSq|jr:|jjSdSrF)rrrBr:rrGrrrrequest_rates

zRobotFileParser.request_ratecCs|js
dS|jSrF)rrrrr	site_mapsszRobotFileParser.site_mapscCs,|j}|jdur||jg}dtt|S)Nz

)rrjoinmapstr)r
rrrr__str__s
zRobotFileParser.__str__N)r)__name__
__module____qualname____doc__rrrr
rr)rrErHrIrJrNrrrrrs
		
	I

c@s(eZdZdZddZddZddZdS)	r6zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs<|dkr|sd}tjtj|}tj||_||_dS)NrT)rrr?rrArrC)r
rrCrrrrs
zRuleLine.__init__cCs|jdkp||jSr$)r
startswith)r
filenamerrrrBszRuleLine.applies_tocCs|jr
dndd|jS)NZAllowZDisallowz: )rCrrrrrrNszRuleLine.__str__N)rOrPrQrRrrBrNrrrrr6sr6c@s0eZdZdZddZddZddZdd	Zd
S)r.z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_d|_d|_dSrF)r&r5r9r:rrrrrszEntry.__init__cCsg}|jD]}|d|q
|jdur<|d|j|jdurf|j}|d|jd|j|tt|j	d
|S)NzUser-agent: z
Crawl-delay: zRequest-rate: r-
)r&r'r9r:ZrequestsZsecondsextendrLrMr5rK)r
ZretagentZraterrrrNs


z
Entry.__str__cCsF|dd}|jD](}|dkr*dS|}||vrdSqdS)z2check if this entry applies to the specified agentr-rr%TF)r1r3r&)r
rDrWrrrrBs
zEntry.applies_tocCs$|jD]}||r|jSqdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r5rBrC)r
rTr=rrrrC
s

zEntry.allowanceN)rOrPrQrRrrNrBrCrrrrr.s

r.)rRcollectionsZurllib.parserZurllib.request__all__
namedtuplerrr6r.rrrr<module>sB