class HttpCompressionTest(TestCase): def setUp(self): self.spider = Spider('foo') self.mw = HttpCompressionMiddleware() def _getresponse(self, coding): if coding not in FORMAT: raise ValueError() samplefile, contentencoding = FORMAT[coding] with open(join(SAMPLEDIR, samplefile), 'rb') as sample: body = sample.read() headers = { 'Server': 'Yaws/1.49 Yet Another Web Server', 'Date': 'Sun, 08 Mar 2009 00:41:03 GMT', 'Content-Length': len(body), 'Content-Type': 'text/html', 'Content-Encoding': contentencoding, } response = Response('http://scrapytest.org/', body=body, headers=headers) response.request = Request('http://scrapytest.org', headers={'Accept-Encoding': 'gzip,deflate'}) return response def test_process_request(self): request = Request('http://scrapytest.org') assert 'Accept-Encoding' not in request.headers self.mw.process_request(request, self.spider) self.assertEqual(request.headers.get('Accept-Encoding'), 'gzip,deflate') def test_process_response_gzip(self): response = self._getresponse('gzip') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'gzip') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_rawdeflate(self): response = self._getresponse('rawdeflate') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'deflate') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_zlibdelate(self): response = self._getresponse('zlibdeflate') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'deflate') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_plain(self): response = Response('http://scrapytest.org', body='<!DOCTYPE...') request = Request('http://scrapytest.org') assert not response.headers.get('Content-Encoding') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is response assert newresponse.body.startswith('<!DOCTYPE') def test_multipleencodings(self): response = self._getresponse('gzip') response.headers['Content-Encoding'] = ['uuencode', 'gzip'] request = response.request newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response self.assertEqual(newresponse.headers.getlist('Content-Encoding'), ['uuencode']) def test_process_response_encoding_inside_body(self): headers = { 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } f = BytesIO() plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() response = Response("http;//www.example.com/", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding('gb2312')) def test_process_response_force_recalculate_encoding(self): headers = { 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } f = BytesIO() plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() response = HtmlResponse("http;//www.example.com/page.html", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding('gb2312')) def test_process_response_gzipped_contenttype(self): response = self._getresponse('gzip') response.headers['Content-Type'] = 'application/gzip' request = response.request newresponse = self.mw.process_response(request, response, self.spider) self.assertIs(newresponse, response) self.assertEqual(response.headers['Content-Encoding'], 'gzip') self.assertEqual(response.headers['Content-Type'], 'application/gzip')
def setUp(self): self.spider = Spider('foo') self.mw = HttpCompressionMiddleware()
class HttpCompressionTest(TestCase): def setUp(self): self.spider = BaseSpider('foo') self.mw = HttpCompressionMiddleware() def _getresponse(self, coding): if coding not in FORMAT: raise ValueError() samplefile, contentencoding = FORMAT[coding] with open(join(SAMPLEDIR, samplefile), 'rb') as sample: body = sample.read() headers = { 'Server': 'Yaws/1.49 Yet Another Web Server', 'Date': 'Sun, 08 Mar 2009 00:41:03 GMT', 'Content-Length': len(body), 'Content-Type': 'text/html', 'Content-Encoding': contentencoding, } response = Response('http://scrapytest.org/', body=body, headers=headers) response.request = Request('http://scrapytest.org', headers={'Accept-Encoding': 'gzip,deflate'}) return response def test_process_request(self): request = Request('http://scrapytest.org') assert 'Accept-Encoding' not in request.headers self.mw.process_request(request, self.spider) self.assertEqual(request.headers.get('Accept-Encoding'), 'x-gzip,gzip,deflate') def test_process_response_gzip(self): response = self._getresponse('gzip') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'gzip') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_rawdeflate(self): response = self._getresponse('rawdeflate') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'deflate') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_zlibdelate(self): response = self._getresponse('zlibdeflate') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'deflate') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_plain(self): response = Response('http://scrapytest.org', body='<!DOCTYPE...') request = Request('http://scrapytest.org') assert not response.headers.get('Content-Encoding') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is response assert newresponse.body.startswith('<!DOCTYPE') def test_multipleencodings(self): response = self._getresponse('gzip') response.headers['Content-Encoding'] = ['uuencode', 'gzip'] request = response.request newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response self.assertEqual(newresponse.headers.getlist('Content-Encoding'), ['uuencode']) def test_process_response_encoding_inside_body(self): headers = { 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } f = StringIO() plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() response = Response("http;//www.example.com/", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding('gb2312')) def test_process_response_force_recalculate_encoding(self): headers = { 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } f = StringIO() plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() response = HtmlResponse("http;//www.example.com/page.html", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding('gb2312'))
def setUp(self): self.spider = BaseSpider('foo') self.mw = HttpCompressionMiddleware()
def process_response(self, request, response, spider): response = _chuncked.process_response(request, response, spider) return HttpCompressionMiddleware.process_response(self,request, response, spider)
class HttpCompressionTest(TestCase): def setUp(self): self.spider = BaseSpider('foo') self.mw = HttpCompressionMiddleware() def _getresponse(self, coding): if coding not in FORMAT: raise ValueError() samplefile, contentencoding = FORMAT[coding] with open(join(SAMPLEDIR, samplefile), 'rb') as sample: body = sample.read() headers = { 'Server': 'Yaws/1.49 Yet Another Web Server', 'Date': 'Sun, 08 Mar 2009 00:41:03 GMT', 'Content-Length': len(body), 'Content-Type': 'text/html', 'Content-Encoding': contentencoding, } response = Response('http://scrapytest.org/', body=body, headers=headers) response.request = Request('http://scrapytest.org', headers={'Accept-Encoding': 'gzip,deflate'}) return response def test_process_request(self): request = Request('http://scrapytest.org') assert 'Accept-Encoding' not in request.headers self.mw.process_request(request, self.spider) self.assertEqual(request.headers.get('Accept-Encoding'), 'gzip,deflate') def test_process_response_gzip(self): response = self._getresponse('gzip') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'gzip') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_rawdeflate(self): response = self._getresponse('rawdeflate') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'deflate') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_zlibdelate(self): response = self._getresponse('zlibdeflate') request = response.request self.assertEqual(response.headers['Content-Encoding'], 'deflate') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith('<!DOCTYPE') assert 'Content-Encoding' not in newresponse.headers def test_process_response_plain(self): response = Response('http://scrapytest.org', body='<!DOCTYPE...') request = Request('http://scrapytest.org') assert not response.headers.get('Content-Encoding') newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is response assert newresponse.body.startswith('<!DOCTYPE') def test_multipleencodings(self): response = self._getresponse('gzip') response.headers['Content-Encoding'] = ['uuencode', 'gzip'] request = response.request newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response self.assertEqual(newresponse.headers.getlist('Content-Encoding'), ['uuencode'])
def setUp(self): self.spider = BaseSpider("foo") self.mw = HttpCompressionMiddleware()
class HttpCompressionTest(TestCase): def setUp(self): self.spider = BaseSpider("foo") self.mw = HttpCompressionMiddleware() def _getresponse(self, coding): if coding not in FORMAT: raise ValueError() samplefile, contentencoding = FORMAT[coding] with open(join(SAMPLEDIR, samplefile), "rb") as sample: body = sample.read() headers = { "Server": "Yaws/1.49 Yet Another Web Server", "Date": "Sun, 08 Mar 2009 00:41:03 GMT", "Content-Length": len(body), "Content-Type": "text/html", "Content-Encoding": contentencoding, } response = Response("http://scrapytest.org/", body=body, headers=headers) response.request = Request("http://scrapytest.org", headers={"Accept-Encoding": "gzip,deflate"}) return response def test_process_request(self): request = Request("http://scrapytest.org") assert "Accept-Encoding" not in request.headers self.mw.process_request(request, self.spider) self.assertEqual(request.headers.get("Accept-Encoding"), "x-gzip,gzip,deflate") def test_process_response_gzip(self): response = self._getresponse("gzip") request = response.request self.assertEqual(response.headers["Content-Encoding"], "gzip") newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith("<!DOCTYPE") assert "Content-Encoding" not in newresponse.headers def test_process_response_rawdeflate(self): response = self._getresponse("rawdeflate") request = response.request self.assertEqual(response.headers["Content-Encoding"], "deflate") newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith("<!DOCTYPE") assert "Content-Encoding" not in newresponse.headers def test_process_response_zlibdelate(self): response = self._getresponse("zlibdeflate") request = response.request self.assertEqual(response.headers["Content-Encoding"], "deflate") newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response assert newresponse.body.startswith("<!DOCTYPE") assert "Content-Encoding" not in newresponse.headers def test_process_response_plain(self): response = Response("http://scrapytest.org", body="<!DOCTYPE...") request = Request("http://scrapytest.org") assert not response.headers.get("Content-Encoding") newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is response assert newresponse.body.startswith("<!DOCTYPE") def test_multipleencodings(self): response = self._getresponse("gzip") response.headers["Content-Encoding"] = ["uuencode", "gzip"] request = response.request newresponse = self.mw.process_response(request, response, self.spider) assert newresponse is not response self.assertEqual(newresponse.headers.getlist("Content-Encoding"), ["uuencode"]) def test_process_response_encoding_inside_body(self): headers = {"Content-Type": "text/html", "Content-Encoding": "gzip"} f = StringIO() plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode="wb") zf.write(plainbody) zf.close() response = Response("http;//www.example.com/", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding("gb2312")) def test_process_response_force_recalculate_encoding(self): headers = {"Content-Type": "text/html", "Content-Encoding": "gzip"} f = StringIO() plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode="wb") zf.write(plainbody) zf.close() response = HtmlResponse("http;//www.example.com/page.html", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding("gb2312"))
def process_response(self, request, response, spider): response = _chuncked.process_response(request, response, spider) return HttpCompressionMiddleware.process_response( self, request, response, spider)