+
     Bi$                         R t ^ RIt^ RIt^ RIt^ RItR.t]P                  ! RR4      t ! R R4      t	 ! R R4      t
 ! R R	4      tR# )
a  robotparser.py

Copyright (C) 2000  Bastian Kleineidam

You can choose between two licenses when using this package:
1) GNU GPLv2
2) PSF license for Python 2.2

The robots.txt Exclusion Protocol is implemented as specified in
http://www.robotstxt.org/norobots-rfc.txt
NRobotFileParserRequestRatezrequests secondsc                   p   a  ] tR t^t o RtRR ltR tR tR tR t	R t
R tR	 tR
 tR tR tR tRtV tR# )r   zjThis class provides a set of methods to read, parse and answer
questions about a single robots.txt file.

c                |    . V n         . V n        R V n        RV n        RV n        V P                  V4       ^ V n        R # )NF)entriessitemapsdefault_entrydisallow_all	allow_allset_urllast_checkedselfurls   &&)/usr/lib/python3.14/urllib/robotparser.py__init__RobotFileParser.__init__   s;    !!S    c                    V P                   # )zReturns the time the robots.txt file was last fetched.

This is useful for long-running web spiders that need to
check for new robots.txt files periodically.

)r   r   s   &r   mtimeRobotFileParser.mtime&   s        r   c                8    ^ RI pVP                  4       V n        R# )zISets the time the robots.txt file was last fetched to the
current time.

N)timer   )r   r   s   & r   modifiedRobotFileParser.modified/   s    
 	 IIKr   c                x    Wn         \        P                  P                  V4      R,          w  V n        V n        R# )z,Sets the URL referring to a robots.txt file.:      NN)r   urllibparseurlparsehostpathr   s   &&r   r   RobotFileParser.set_url7   s)    %||44S9#>	49r   c                    \         P                  P                  V P                  4      pVP	                  4       pV P                  VP                  R4      P                  4       4       R#   \         P                  P                   d^   pTP                  R9   d	   RT n        M)TP                  R8  d   TP                  R8  d   RT n        TP                  4         Rp?R# Rp?ii ; i)z4Reads the robots.txt URL and feeds it to the parser.zutf-8Ti  i  N)i  i  )r   requesturlopenr   readr    decode
splitlineserror	HTTPErrorcoder	   r
   close)r   frawerrs   &   r   r(   RobotFileParser.read<   s    
	9&&txx0A &&(CJJszz'*5578 ||%% 	xx:%$(!SSXX^!%IIKK	s   )A+ +C'
AC""C'c                    R VP                   9   d   V P                  f	   Wn        R# R# V P                  P                  V4       R# )*N)
useragentsr   r   append)r   entrys   &&r   
_add_entryRobotFileParser._add_entryJ   s;    %"""!!)%*" * LL&r   c                
   ^ p\        4       pV P                  4        V EFE  pV'       g9   V^8X  d   \        4       p^ pM$V^8X  d   V P                  V4       \        4       p^ pVP                  R4      pV^ 8  d   VRV pVP	                  4       pV'       g   K{  VP                  R^4      p\        V4      ^8X  g   K  V^ ,          P	                  4       P                  4       V^ &   \        P                  P                  V^,          P	                  4       4      V^&   V^ ,          R8X  dJ   V^8X  d   V P                  V4       \        4       pVP                  P                  V^,          4       ^pEKU  V^ ,          R8X  d<   V^ 8w  d2   VP                  P                  \        V^,          R4      4       ^pEK  EK  V^ ,          R8X  d<   V^ 8w  d2   VP                  P                  \        V^,          R4      4       ^pEK  EK  V^ ,          R	8X  dR   V^ 8w  dH   V^,          P	                  4       P                  4       '       d   \!        V^,          4      Vn        ^pEKC  EKF  V^ ,          R
8X  d   V^ 8w  d   V^,          P                  R4      p\        V4      ^8X  d   V^ ,          P	                  4       P                  4       '       d]   V^,          P	                  4       P                  4       '       d2   \%        \!        V^ ,          4      \!        V^,          4      4      Vn        ^pEK  EK  V^ ,          R8X  g   EK#  V P(                  P                  V^,          4       EKH  	  V^8X  d   V P                  V4       R# R# )z|Parse the input lines from a robots.txt file.

We allow that a user-agent: line is not preceded by
one or more blank lines.
#N:z
user-agentdisallowFallowTzcrawl-delayzrequest-rate/sitemap)Entryr   r8   findstripsplitlenlowerr   r    unquoter5   r6   	rulelinesRuleLineisdigitintdelayr   req_rater   )r   linesstater7   lineinumberss   &&     r   r    RobotFileParser.parseS   s    DA:!GEEaZOOE*!GEE		#AAvBQx::<D::c1%D4yA~q'--///1Q ,,..tAw}}?Q7l*z. %$$++DG4E!W
*z..xQ/GH ! " !W'z..xQ/FG ! " !W-z  7==?2244*-d1g,EK ! " !W.z"&q'--"4LA-'!*2B2B2D2L2L2N2N '
 0 0 2 : : < <-8WQZ#gVWj/-ZEN ! " !W	)
 MM((a1o p A:OOE" r   c                   V P                   '       d   R# V P                  '       d   R# V P                  '       g   R# \        P                  P                  \        P                  P                  V4      4      p\        P                  P                  RRVP                  VP                  VP                  VP                  34      p\        P                  P                  V4      pV'       g   RpV P                   F-  pVP                  V4      '       g   K  VP                  V4      u # 	  V P                   '       d   V P                   P                  V4      # R# )z=using the parsed robots.txt decide if useragent can fetch urlFT r?   )r	   r
   r   r   r    r!   rG   
urlunparser#   paramsqueryfragmentquoter   
applies_to	allowancer   )r   	useragentr   
parsed_urlr7   s   &&&  r   	can_fetchRobotFileParser.can_fetch   s   >>>
     \\**6<<+?+?+DE
ll%%r"Z__j..
0C0C'E Fll  %C\\E	**s++ " %%//44r   c                    V P                  4       '       g   R # V P                   F(  pVP                  V4      '       g   K  VP                  u # 	  V P                  '       d   V P                  P                  # R # N)r   r   r[   rL   r   r   r]   r7   s   && r   crawl_delayRobotFileParser.crawl_delay   s\    zz||\\E	**{{" " %%+++r   c                    V P                  4       '       g   R # V P                   F(  pVP                  V4      '       g   K  VP                  u # 	  V P                  '       d   V P                  P                  # R # rb   )r   r   r[   rM   r   rc   s   && r   request_rateRobotFileParser.request_rate   s\    zz||\\E	**~~% " %%...r   c                B    V P                   '       g   R # V P                   # rb   )r   r   s   &r   	site_mapsRobotFileParser.site_maps   s    }}}}}r   c                    V P                   pV P                  e   WP                  .,           pRP                  \        \        V4      4      # )Nz

)r   r   joinmapstr)r   r   s   & r   __str__RobotFileParser.__str__   s>    ,,)!3!3 44G{{3sG,--r   )	r
   r   r	   r   r"   r   r#   r   r   N)rU   )__name__
__module____qualname____firstlineno____doc__r   r   r   r   r(   r8   r    r_   rd   rg   rj   rp   __static_attributes____classdictcell____classdict__s   @r   r   r      sM     
!(?
9'G#R:
. .r   c                   6   a  ] tR t^t o RtR tR tR tRtV t	R# )rI   zhA rule line is a single "Allow:" (allowance==True) or "Disallow:"
(allowance==False) followed by a path.c                    VR 8X  d   V'       g   Rp\         P                  P                  \         P                  P                  V4      4      p\         P                  P	                  V4      V n        W n        R# )rU   TN)r   r    rV   r!   rZ   r#   r\   )r   r#   r\   s   &&&r   r   RuleLine.__init__   sN    2:iI||&&v||'<'<T'BCLL&&t,	"r   c                f    V P                   R 8H  ;'       g    VP                  V P                   4      # )r4   )r#   
startswith)r   filenames   &&r   r[   RuleLine.applies_to   s(    yyCAA8#6#6tyy#AAr   c                ^    V P                   '       d   R MRR,           V P                  ,           # )AllowDisallowz: r\   r#   r   s   &r   rp   RuleLine.__str__   s     >>>zTADIIMMr   r   N)
rr   rs   rt   ru   rv   r   r[   rp   rw   rx   ry   s   @r   rI   rI      s!     1#BN Nr   rI   c                   <   a  ] tR t^t o RtR tR tR tR tRt	V t
R# )rA   z?An entry has one or more user-agents and zero or more rulelinesc                >    . V n         . V n        R V n        R V n        R # rb   )r5   rH   rL   rM   r   s   &r   r   Entry.__init__   s    
r   c                   . pV P                    F  pVP                  R V 24       K  	  V P                  e   VP                  RV P                   24       V P                  e8   V P                  pVP                  RVP                   RVP
                   24       VP                  \        \        V P                  4      4       RP                  V4      # )zUser-agent: zCrawl-delay: zRequest-rate: r?   
)r5   r6   rL   rM   requestssecondsextendrn   ro   rH   rm   )r   retagentrates   &   r   rp   Entry.__str__   s    __EJJeW-. %::!JJtzzl34==$==DJJa~FG

3sDNN+,yy~r   c                    VP                  R4      ^ ,          P                  4       pV P                   F&  pVR8X  d    R# VP                  4       pW!9   g   K%   R# 	  R# )z2check if this entry applies to the specified agentr?   r4   TF)rD   rF   r5   )r   r]   r   s   && r   r[   Entry.applies_to   sQ     OOC(+113	__E|KKME! % r   c                v    V P                    F(  pVP                  V4      '       g   K  VP                  u # 	  R# )zJPreconditions:
- our agent applies to this entry
- filename is URL decodedT)rH   r[   r\   )r   r   rP   s   && r   r\   Entry.allowance  s0     NNDx((~~% # r   )rL   rM   rH   r5   N)rr   rs   rt   ru   rv   r   rp   r[   r\   rw   rx   ry   s   @r   rA   rA      s!     I
 r   rA   )rv   collectionsurllib.errorr   urllib.parseurllib.request__all__
namedtupler   r   rI   rA    r   r   <module>r      sV   
    
$$]4FG. .DN N$( (r   