File "robotparser.cpython-38.pyc"

Full Path: /home/attunedd/public_html/byp/izo/con7ext_sym404/rintoar.txt/lib64/python3.8/urllib/__pycache__/robotparser.cpython-38.pyc
File size: 7.16 KB
MIME-type: text/x-bytecode.python
Charset: 8 bit

U

e5d$@s\dZddlZddlZddlZdgZeddZGdddZGdddZ	Gd	d
d
Z
dS)a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
NRobotFileParserRequestRatezrequests secondsc@sreZdZdZdddZddZddZd	d
ZddZd
dZ	ddZ
ddZddZddZ
ddZddZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    cCs2g|_g|_d|_d|_d|_||d|_dS)NFr)entriessitemaps
default_entrydisallow_all	allow_allset_urllast_checkedselfurlr*/usr/lib64/python3.8/urllib/robotparser.py__init__s
zRobotFileParser.__init__cCs|jS)zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )rr
rrrmtime%szRobotFileParser.mtimecCsddl}||_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)timer)r
rrrrmodified.szRobotFileParser.modifiedcCs&||_tj|dd\|_|_dS)z,Sets the URL referring to a robots.txt file.N)rurllibparseurlparseZhostpathrrrrr
6szRobotFileParser.set_urlc
Csztj|j}WnRtjjk
rd}z0|jdkr:d|_n|jdkrT|jdkrTd|_W5d}~XYnX|	}|
|ddS)z4Reads the robots.txt URL and feeds it to the parser.)iiTiiNzutf-8)
rZrequestZurlopenrerrorZ	HTTPErrorcoderr	readrdecode
splitlines)r
ferrrawrrrr;s
zRobotFileParser.readcCs,d|jkr|jdkr(||_n|j|dSN*)
useragentsrrappend)r
entryrrr
_add_entryHs

zRobotFileParser._add_entrycCsPd}t}||D]}|sP|dkr4t}d}n|dkrP||t}d}|d}|dkrn|d|}|}|s|q|dd}t|dkr|d|d<tj	
|d|d<|ddkr|dkr||t}|j|dd}q|ddkr.|dkr6|j
t|dd	d}q|dd
krb|dkr6|j
t|ddd}q|ddkr|dkr6|drt|d|_d}q|dd
kr|dkr6|dd}t|dkr|dr|drtt|dt|d|_d}q|ddkr|j|dq|dkrL||dS)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr#N:z
user-agentZdisallowFZallowTzcrawl-delayzrequest-rate/Zsitemap)Entryrr)findstripsplitlenlowerrrunquoter&r'	rulelinesRuleLineisdigitintdelayrreq_rater)r
linesstater(lineiZnumbersrrrrQsj








 
zRobotFileParser.parsecCs|jr
dS|jrdS|jsdStjtj|}tjdd|j|j	|j
|jf}tj|}|sfd}|j
D]}||rl||Sql|jr|j|SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTrr-)rr	rrrrr4
urlunparserZparamsZqueryZfragmentquoter
applies_to	allowancer)r
	useragentrZ
parsed_urlr(rrr	can_fetchs*

zRobotFileParser.can_fetchcCs>|sdS|jD]}||r|jSq|jr:|jjSdSN)rrrAr9rr
rCr(rrrcrawl_delays

zRobotFileParser.crawl_delaycCs>|sdS|jD]}||r|jSq|jr:|jjSdSrE)rrrAr:rrFrrrrequest_rates

zRobotFileParser.request_ratecCs|js
dS|jSrE)rrrrr	site_mapsszRobotFileParser.site_mapscCs,|j}|jdk	r||jg}dtt|S)Nz

)rrjoinmapstr)r
rrrr__str__s
zRobotFileParser.__str__N)r)__name__
__module____qualname____doc__rrrr
rr)rrDrGrHrIrMrrrrrs
		
	I

c@s(eZdZdZddZddZddZdS)	r6zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs<|dkr|sd}tjtj|}tj||_||_dS)NrT)rrr?rr@rrB)r
rrBrrrrs
zRuleLine.__init__cCs|jdkp||jSr$)r
startswith)r
filenamerrrrAszRuleLine.applies_tocCs|jr
dndd|jS)NZAllowZDisallowz: )rBrrrrrrMszRuleLine.__str__N)rNrOrPrQrrArMrrrrr6sr6c@s0eZdZdZddZddZddZdd	Zd
S)r.z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_d|_d|_dSrE)r&r5r9r:rrrrrszEntry.__init__cCsg}|jD]}|d|q
|jdk	r<|d|j|jdk	rf|j}|d|jd|j|tt|j	d
|S)NzUser-agent: z
Crawl-delay: zRequest-rate: r-
)r&r'r9r:ZrequestsZsecondsextendrKrLr5rJ)r
ZretagentZraterrrrMs


z
Entry.__str__cCsF|dd}|jD](}|dkr*dS|}||krdSqdS)z2check if this entry applies to the specified agentr-rr%TF)r1r3r&)r
rCrVrrrrAs
zEntry.applies_tocCs$|jD]}||r|jSqdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r5rArB)r
rSr=rrrrB
s

zEntry.allowanceN)rNrOrPrQrrMrArBrrrrr.s

r.)rQcollectionsZurllib.parserZurllib.request__all__
namedtuplerrr6r.rrrr<module>sB