示例#1
0
    def test_unbounded_response(self):
        # Completeness of responses without Content-Length or Transfer-Encoding
        # can not be determined, we treat them as valid but flagged as "partial"
        from urllib import urlencode
        query = urlencode({
            'raw':
            '''\
HTTP/1.1 200 OK
Server: Apache-Coyote/1.1
X-Powered-By: Servlet 2.4; JBoss-4.2.3.GA (build: SVNTag=JBoss_4_2_3_GA date=200807181417)/JBossWeb-2.0
Set-Cookie: JSESSIONID=08515F572832D0E659FD2B0D8031D75F; Path=/
Pragma: no-cache
Expires: Thu, 01 Jan 1970 00:00:00 GMT
Cache-Control: no-cache
Cache-Control: no-store
Content-Type: text/html;charset=UTF-8
Content-Language: en
Date: Tue, 27 Aug 2013 13:05:05 GMT
Connection: close

foo body
with multiples lines
'''
        })
        spider = SimpleSpider("http://localhost:8998/raw?{0}".format(query))
        yield docrawl(spider)
        log = get_testlog()
        self.assertEqual(log.count("Got response 200"), 1)
示例#2
0
    def test_unbounded_response(self):
        # Completeness of responses without Content-Length or Transfer-Encoding
        # can not be determined, we treat them as valid but flagged as "partial"
        from urllib import urlencode
        query = urlencode({'raw': '''\
HTTP/1.1 200 OK
Server: Apache-Coyote/1.1
X-Powered-By: Servlet 2.4; JBoss-4.2.3.GA (build: SVNTag=JBoss_4_2_3_GA date=200807181417)/JBossWeb-2.0
Set-Cookie: JSESSIONID=08515F572832D0E659FD2B0D8031D75F; Path=/
Pragma: no-cache
Expires: Thu, 01 Jan 1970 00:00:00 GMT
Cache-Control: no-cache
Cache-Control: no-store
Content-Type: text/html;charset=UTF-8
Content-Language: en
Date: Tue, 27 Aug 2013 13:05:05 GMT
Connection: close

foo body
with multiples lines
'''})
        spider = SimpleSpider("http://localhost:8998/raw?{0}".format(query))
        yield docrawl(spider)
        log = get_testlog()
        self.assertEqual(log.count("Got response 200"), 1)
    def test_logging(self):
        spider = _HttpErrorSpider(bypass_status_codes={402})
        yield docrawl(spider)
        # print(get_testlog())
        self.assertEqual(spider.parsed, {'200', '402'})
        self.assertEqual(spider.skipped, {'402'})
        self.assertEqual(spider.failed, {'404', '500'})

        log = get_testlog()
        self.assertIn('Ignoring response <404', log)
        self.assertIn('Ignoring response <500', log)
        self.assertNotIn('Ignoring response <200', log)
        self.assertNotIn('Ignoring response <402', log)
    def test_logging(self):
        crawler = get_crawler(_HttpErrorSpider)
        yield crawler.crawl(bypass_status_codes={402})
        # print(get_testlog())
        self.assertEqual(crawler.spider.parsed, {'200', '402'})
        self.assertEqual(crawler.spider.skipped, {'402'})
        self.assertEqual(crawler.spider.failed, {'404', '500'})

        log = get_testlog()
        self.assertIn('Ignoring response <404', log)
        self.assertIn('Ignoring response <500', log)
        self.assertNotIn('Ignoring response <200', log)
        self.assertNotIn('Ignoring response <402', log)
示例#5
0
 def _assert_retried(self):
     log = get_testlog()
     self.assertEqual(log.count("Retrying"), 2)
     self.assertEqual(log.count("Gave up retrying"), 1)
示例#6
0
 def _assert_retried(self):
     log = get_testlog()
     self.assertEqual(log.count("Retrying"), 2)
     self.assertEqual(log.count("Gave up retrying"), 1)
示例#7
0
 def _assert_got_tunnel_error(self):
     log = get_testlog()
     self.assertEqual(log.count('TunnelError'), 1)
示例#8
0
 def _assert_got_response_code(self, code):
     log = get_testlog()
     self.assertEqual(log.count('Crawled (%d)' % code), 1)
示例#9
0
 def _assert_got_tunnel_error(self):
     log = get_testlog()
     self.assertEqual(log.count('TunnelError'), 1)
示例#10
0
 def _assert_got_response_code(self, code):
     log = get_testlog()
     self.assertEqual(log.count('Crawled (%d)' % code), 1)