def _get_encoding(self, infer=False): enc = self._declared_encoding() if enc and not encoding_exists(enc): enc = None if not enc and infer: enc = self._body_inferred_encoding() if not enc: enc = self._DEFAULT_ENCODING return resolve_encoding(enc)
def test_process_response_force_recalculate_encoding(self): headers = {"Content-Type": "text/html", "Content-Encoding": "gzip"} f = StringIO() plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode="wb") zf.write(plainbody) zf.close() response = HtmlResponse("http;//www.example.com/page.html", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding("gb2312"))
def test_process_response_encoding_inside_body(self): headers = { 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } f = StringIO() plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() response = Response("http;//www.example.com/", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding('gb2312'))
def test_process_response_force_recalculate_encoding(self): headers = { 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } f = StringIO() plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() response = HtmlResponse("http;//www.example.com/page.html", headers=headers, body=f.getvalue()) request = Request("http://www.example.com/") newresponse = self.mw.process_response(request, response, self.spider) assert isinstance(newresponse, HtmlResponse) self.assertEqual(newresponse.body, plainbody) self.assertEqual(newresponse.encoding, resolve_encoding('gb2312'))
def _assert_response_encoding(self, response, encoding): self.assertEqual(response.encoding, resolve_encoding(encoding))
def test_resolve_encoding(self): self.assertEqual(resolve_encoding('latin1', self._ENCODING_ALIASES), 'latin1') self.assertEqual(resolve_encoding('foo', self._ENCODING_ALIASES), 'cp1252')