
W4                 @   sd   d  Z  d d l Z d d l Z d g Z Gd d   d  Z Gd d   d  Z Gd d   d  Z d S)	a%   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
    NRobotFileParserc               @   s   e  Z d  Z d Z d d d  Z d d   Z d d   Z d	 d
   Z d d   Z d d   Z	 d d   Z
 d d   Z d d   Z d S)r   zs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

     c             C   s>   g  |  _  d  |  _ d |  _ d |  _ |  j |  d |  _ d  S)NFr   )entriesdefault_entrydisallow_all	allow_allset_urllast_checked)selfurl r   @/afs/.cs/s/python-3.5.2/amd64_ubu14/lib/python3.5/robotparser.py__init__   s    				zRobotFileParser.__init__c             C   s   |  j  S)zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r	   )r
   r   r   r   mtime   s    zRobotFileParser.mtimec             C   s   d d l  } | j    |  _ d S)zYSets the time the robots.txt file was last fetched to the
        current time.

        r   N)timer	   )r
   r   r   r   r   modified(   s    zRobotFileParser.modifiedc             C   s5   | |  _  t j j |  d d  \ |  _ |  _ d S)z,Sets the URL referring to a robots.txt file.      N)r   urllibparseurlparsehostpath)r
   r   r   r   r   r   0   s    	zRobotFileParser.set_urlc             C   s   y t  j j |  j  } Wnm t  j j k
 r } zG | j d k rO d |  _ n' | j d k rv | j d k  rv d |  _ WYd d } ~ Xn) X| j	   } |  j
 | j d  j    d S)	z4Reads the robots.txt URL and feeds it to the parser.    Ti  i  Nzutf-8)r   r   )r   Zrequesturlopenr   error	HTTPErrorcoder   r   readr   decode
splitlines)r
   ferrrawr   r   r   r   5   s    zRobotFileParser.readc             C   s>   d | j  k r* |  j d  k r: | |  _ n |  j j |  d  S)N*)
useragentsr   r   append)r
   entryr   r   r   
_add_entryB   s    zRobotFileParser._add_entryc             C   s  d } t    } |  j   x| D]} | sr | d k rJ t    } d } n( | d k rr |  j |  t    } d } | j d  } | d k r | d |  } | j   } | s q  | j d d  } t |  d k r  | d j   j   | d <t j	 j
 | d j    | d <| d d k r_| d k rB|  j |  t    } | j j | d  d } q  | d d k r| d k r| j j t | d d	   d } q  | d d
 k r  | d k r  | j j t | d d   d } q  W| d k r|  j |  d S)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        r   r      #N:z
user-agentZdisallowFZallowT)Entryr   r)   findstripsplitlenlowerr   r   unquoter&   r'   	rulelinesRuleLine)r
   linesstater(   lineir   r   r   r   K   sL    
	
			 			
zRobotFileParser.parsec             C   s   |  j  r d S|  j r d S|  j s' d St j j t j j |   } t j j d d | j | j	 | j
 | j f  } t j j |  } | s d } x- |  j D]" } | j |  r | j |  Sq W|  j r |  j j |  Sd S)z=using the parsed robots.txt decide if useragent can fetch urlFTr   /)r   r   r	   r   r   r   r3   
urlunparser   paramsZqueryZfragmentquoter   
applies_to	allowancer   )r
   	useragentr   Z
parsed_urlr(   r   r   r   	can_fetch   s$    				zRobotFileParser.can_fetchc             C   s   d j  d d   |  j D  S)Nr   c             S   s    g  |  ] } t  |  d   q S)
)str).0r(   r   r   r   
<listcomp>   s   	 z+RobotFileParser.__str__.<locals>.<listcomp>)joinr   )r
   r   r   r   __str__   s    zRobotFileParser.__str__N)__name__
__module____qualname____doc__r   r   r   r   r   r)   r   rA   rG   r   r   r   r   r      s   		4c               @   s:   e  Z d  Z d Z d d   Z d d   Z d d   Z d S)	r5   zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c             C   sY   | d k r | r d } t  j j t  j j |   } t  j j |  |  _ | |  _ d  S)Nr   T)r   r   r;   r   r=   r   r?   )r
   r   r?   r   r   r   r      s
    zRuleLine.__init__c             C   s   |  j  d k p | j |  j   S)Nr%   )r   
startswith)r
   filenamer   r   r   r>      s    zRuleLine.applies_toc             C   s   |  j  r d n d d |  j S)NZAllowZDisallowz: )r?   r   )r
   r   r   r   rG      s    zRuleLine.__str__N)rH   rI   rJ   rK   r   r>   rG   r   r   r   r   r5      s   r5   c               @   sF   e  Z d  Z d Z d d   Z d d   Z d d   Z d d	   Z d
 S)r-   z?An entry has one or more user-agents and zero or more rulelinesc             C   s   g  |  _  g  |  _ d  S)N)r&   r4   )r
   r   r   r   r      s    	zEntry.__init__c             C   sj   g  } x' |  j  D] } | j d | d g  q Wx* |  j D] } | j t |  d g  q: Wd j |  S)NzUser-agent: rB   r   )r&   extendr4   rC   rF   )r
   retagentr8   r   r   r   rG      s    zEntry.__str__c             C   s]   | j  d  d j   } x= |  j D]2 } | d k r9 d S| j   } | | k r# d Sq# Wd S)z2check if this entry applies to the specified agentr:   r   r%   TF)r0   r2   r&   )r
   r@   rP   r   r   r   r>      s    zEntry.applies_toc             C   s.   x' |  j  D] } | j |  r
 | j Sq
 Wd S)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r4   r>   r?   )r
   rM   r8   r   r   r   r?      s    zEntry.allowanceN)rH   rI   rJ   rK   r   rG   r>   r?   r   r   r   r   r-      s
   r-   )rK   Zurllib.parser   Zurllib.request__all__r   r5   r-   r   r   r   r   <module>   s
   	