def test_unbounded_response(self): # Completeness of responses without Content-Length or Transfer-Encoding # can not be determined, we treat them as valid but flagged as "partial" from urllib import urlencode query = urlencode({ 'raw': '''\ HTTP/1.1 200 OK Server: Apache-Coyote/1.1 X-Powered-By: Servlet 2.4; JBoss-4.2.3.GA (build: SVNTag=JBoss_4_2_3_GA date=200807181417)/JBossWeb-2.0 Set-Cookie: JSESSIONID=08515F572832D0E659FD2B0D8031D75F; Path=/ Pragma: no-cache Expires: Thu, 01 Jan 1970 00:00:00 GMT Cache-Control: no-cache Cache-Control: no-store Content-Type: text/html;charset=UTF-8 Content-Language: en Date: Tue, 27 Aug 2013 13:05:05 GMT Connection: close foo body with multiples lines ''' }) spider = SimpleSpider("http://localhost:8998/raw?{0}".format(query)) yield docrawl(spider) log = get_testlog() self.assertEqual(log.count("Got response 200"), 1)
def test_unbounded_response(self): # Completeness of responses without Content-Length or Transfer-Encoding # can not be determined, we treat them as valid but flagged as "partial" from urllib import urlencode query = urlencode({'raw': '''\ HTTP/1.1 200 OK Server: Apache-Coyote/1.1 X-Powered-By: Servlet 2.4; JBoss-4.2.3.GA (build: SVNTag=JBoss_4_2_3_GA date=200807181417)/JBossWeb-2.0 Set-Cookie: JSESSIONID=08515F572832D0E659FD2B0D8031D75F; Path=/ Pragma: no-cache Expires: Thu, 01 Jan 1970 00:00:00 GMT Cache-Control: no-cache Cache-Control: no-store Content-Type: text/html;charset=UTF-8 Content-Language: en Date: Tue, 27 Aug 2013 13:05:05 GMT Connection: close foo body with multiples lines '''}) spider = SimpleSpider("http://localhost:8998/raw?{0}".format(query)) yield docrawl(spider) log = get_testlog() self.assertEqual(log.count("Got response 200"), 1)
def test_logging(self): spider = _HttpErrorSpider(bypass_status_codes={402}) yield docrawl(spider) # print(get_testlog()) self.assertEqual(spider.parsed, {'200', '402'}) self.assertEqual(spider.skipped, {'402'}) self.assertEqual(spider.failed, {'404', '500'}) log = get_testlog() self.assertIn('Ignoring response <404', log) self.assertIn('Ignoring response <500', log) self.assertNotIn('Ignoring response <200', log) self.assertNotIn('Ignoring response <402', log)
def test_logging(self): crawler = get_crawler(_HttpErrorSpider) yield crawler.crawl(bypass_status_codes={402}) # print(get_testlog()) self.assertEqual(crawler.spider.parsed, {'200', '402'}) self.assertEqual(crawler.spider.skipped, {'402'}) self.assertEqual(crawler.spider.failed, {'404', '500'}) log = get_testlog() self.assertIn('Ignoring response <404', log) self.assertIn('Ignoring response <500', log) self.assertNotIn('Ignoring response <200', log) self.assertNotIn('Ignoring response <402', log)
def _assert_retried(self): log = get_testlog() self.assertEqual(log.count("Retrying"), 2) self.assertEqual(log.count("Gave up retrying"), 1)
def _assert_got_tunnel_error(self): log = get_testlog() self.assertEqual(log.count('TunnelError'), 1)
def _assert_got_response_code(self, code): log = get_testlog() self.assertEqual(log.count('Crawled (%d)' % code), 1)