
ņIXi                 @   s&  d  d l  Z  d  d l Z d  d l Z d  d l m Z m Z d  d l m Z d  d l	 m
 Z
 d  d l m Z m Z y d  d l Z Wn e k
 r d Z Yn XGd d   d e j  Z e j   Z d d	 d
  Z d Z d d g Z d d d g Z e d e e e  d Z d d dO g Z d g Z e d e e e  d Z g  Z d d d g Z e d e e e  d Z g  Z d d d d d d d g Z e d  e e e d!  e d" e e e d#  d$ Z d g Z d d d d d% d& g Z e d' e e e  d( Z d g Z g  Z e d) e e e  d* Z d+ g Z d, g Z e d- e e e d. d/ d0 Z g  Z d1 g Z e d2 e e e d. d/ g  Z d1 g Z e d3 e e e d. d4 d5 Z g  Z d1 g Z e d6 e e e d. d/ d1 g Z g  Z e d7 e e e d. d4 d* Z d+ g Z d, g Z e d8 e e e d. d9 d: Z d; g Z d< g Z e d= e e e  d> Z d? g Z d; g Z e d@ e e e  dA Z dB g Z dC g Z e dD e e e  GdE dF   dF e  Z e j e dG  GdH dI   dI e j   Z GdJ dK   dK e j  Z dL dM   Z e dN k r"e j   d S)P    N)URLError	HTTPError)urlopen)support)BaseHTTPRequestHandler
HTTPServerc               @   sC   e  Z d  Z d d d d d d d  Z d d   Z d d   Z d S)RobotTestCaseNc             C   sz   t  | t  s d  St j j |   | r? d | | f |  _ n d | | f |  _ | |  _ | |  _ | |  _ | |  _	 d  S)NzRobotTest(%d, good, %s)zRobotTest(%d, bad, %s))

isinstanceintunittestTestCase__init__strparserurlgoodagent)selfindexr   r   r   r    r   N/afs/.cs.wisc.edu/s/python-3.5.2/amd64_ubu16/lib/python3.5/test_robotparser.pyr      s    			zRobotTestCase.__init__c             C   s~   t  |  j t  r$ |  j \ } } n |  j } |  j } |  j r^ |  j |  j j | |   n |  j |  j j | |   d  S)N)	r	   r   tupler   r   
assertTruer   	can_fetchassertFalse)r   r   r   r   r   r   runTest   s    			zRobotTestCase.runTestc             C   s   |  j  S)N)r   )r   r   r   r   __str__(   s    zRobotTestCase.__str__)__name__
__module____qualname__r   r   r   r   r   r   r   r      s   r   Ztest_robotparserc             C   s   t  j |  j   } t j j   } | j |  x- | D]% } t j t	 |  | | d |   q8 Wx- | D]% } t j t	 |  | | d |   qh Wd  S)N   r   )
ioStringIO	readlinesurllibrobotparserRobotFileParserparsetestsaddTestr   )r   Z
robots_txtZ	good_urlsZbad_urlsr   linesr   r   r   r   r   	RobotTest-   s    #r+   z
User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
/z
/test.html/cyberworld/map/index.htmlz/tmp/xxxz	/foo.htmlr    z
# robots.txt for http://www.example.com/

User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:

cybermapper   z%
# go away
User-agent: *
Disallow: /
z/tmp/   zm
User-agent: figtree
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
z/tmpz	/tmp.htmlz/tmp/a.htmlz/a%3cd.htmlz/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.html   Zfigtree   zFigTree Robot libwww-perl/5.04zf
User-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
z	/a/b.htmlz/%7Ejoe/index.html   z
User-Agent: *
Disallow: /.
   zG
User-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
z/folder1/myfile.htmlz/folder1/anotherfile.html   r   Z	GooglebotzJ
User-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
z/something.jpg	   
   zGooglebot-MobilezJ
User-agent: Googlebot-Mobile
Allow: /

User-agent: Googlebot
Disallow: /
         Z	googlebotz/
User-agent: *
Disallow: /some/path?name=value
z
/some/pathz/some/path?name=value   zK
User-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
z/another/path   z;
User-agent: *
Allow: /some/path?
Disallow: /another/path?
z/some/path?z/another/path?   c               @   s(   e  Z d  Z d d   Z d d   Z d S)RobotHandlerc             C   s   |  j  d d  d  S)Ni  zForbidden access)Z
send_error)r   r   r   r   do_GET  s    zRobotHandler.do_GETc             G   s   d  S)Nr   )r   formatargsr   r   r   log_message  s    zRobotHandler.log_messageN)r   r   r   r?   rB   r   r   r   r   r>      s   r>   z threading required for this testc               @   sL   e  Z d  Z d d   Z d d   Z d d   Z d d   Z d	 d
   Z d S)PasswordProtectedSiteTestCasec             C   se   t  t j d f t  |  _ t j d d d |  j j d d d i  |  _ d |  j _	 |  j j
   d  S)	Nr   namezHTTPServer servingtargetkwargsZpoll_intervalg{Gz?T)r   r   HOSTr>   server	threadingThreadZserve_forevertdaemonstart)r   r   r   r   setUp  s    	z#PasswordProtectedSiteTestCase.setUpc             C   s+   |  j  j   |  j j   |  j  j   d  S)N)rH   shutdownrK   joinZserver_close)r   r   r   r   tearDown  s    z&PasswordProtectedSiteTestCase.tearDownc             C   s   |  j    d  S)N)testPasswordProtectedSite)r   r   r   r   r     s    z%PasswordProtectedSiteTestCase.runTestc             C   sx   |  j  j } d t j d t | d  } | d } t j j   } | j |  | j	   |  j
 | j d |   d  S)Nzhttp://:r    z/robots.txt*)rH   Zserver_addressr   rG   r   r$   r%   r&   Zset_urlreadr   r   )r   addrr   Z
robots_urlr   r   r   r   rR   !  s    

z7PasswordProtectedSiteTestCase.testPasswordProtectedSitec             C   s   d |  j  j S)Nz%s)	__class__r   )r   r   r   r   r   *  s    z%PasswordProtectedSiteTestCase.__str__N)r   r   r   rN   rQ   r   rR   r   r   r   r   r   rC   	  s
   	rC   c               @   s+   e  Z d  Z e j d  d d    Z d S)NetworkTestCasez7does not handle the gzip encoding delivered by pydotorgc             C   s]   t  j d  t  j d  : t j j d  } | j   |  j | j d d   Wd  QRXd  S)NZnetworkzwww.python.orgz http://www.python.org/robots.txtrT   )	r   ZrequiresZtransient_internetr$   r%   r&   rU   r   r   )r   r   r   r   r   testPythonOrg/  s    		
zNetworkTestCase.testPythonOrgN)r   r   r   r   skiprY   r   r   r   r   rX   -  s   rX   c             C   s0   t  j t  } | j t  | j t    | S)N)r   Z	makeSuiterX   r)   r(   rC   )loaderZsuitepatternr   r   r   
load_tests9  s    r]   __main__)r.   r-   )r!   r   Zurllib.robotparserr$   Zurllib.errorr   r   Zurllib.requestr   testr   Zhttp.serverr   r   rI   ImportErrorr   r   Z	TestSuiter(   r+   docr   Zbadr>   Z
skipUnlessrC   rX   r]   r   mainr   r   r   r   <module>   s   												
																#