def my_fetcher(url): uri = url # Otherwise fetch the data response = subrequest(unquote(uri)) # Handle redirects if response.status == 301: uri = response.getHeader('location') response = subrequest(unquote(uri)) if response.status != 200: raise Exception("URI not found") content_type = response.getHeader('content-type') # Default encoding encoding = 'utf-8' if content_type: if ';' in content_type: ctype, encoding = content_type.split(';') encoding = encoding.split('charset=')[-1] else: ctype = content_type # Guess the content_type from the URI if needed else: ctype, encoding = guess_content_type(uri) if ctype and ctype.startswith('text/'): ctype = text_type(uri) data = response.getBody() # I don't think we need to encode ctype == 'text/css' to ascii anymore return dict(string=data, mime_type=ctype, encoding=encoding)
def test_text_type(self): HTML = "<HtmL><body>hello world</body></html>" from zope.contenttype import text_type self.assertEqual(text_type(HTML), "text/html") self.assertEqual(text_type('<?xml version="1.0"><foo/>'), "text/xml") self.assertEqual(text_type('<?XML version="1.0"><foo/>'), "text/plain") self.assertEqual(text_type("foo bar"), "text/plain") self.assertEqual( text_type( "<!DOCTYPE HTML PUBLIC " '"-//W3C//DTD HTML 4.01 Transitional//EN" ' '"http://www.w3.org/TR/html4/loose.dtd">' ), "text/html", ) # See https://bugs.launchpad.net/bugs/487998 self.assertEqual(text_type(" " * 14 + HTML), "text/html") self.assertEqual(text_type(" " * 14 + "abc"), "text/plain") self.assertEqual(text_type(" " * 14), "text/plain")
def test_text_type(self): HTML = b'<HtmL><body>hello world</body></html>' from zope.contenttype import text_type self.assertEqual(text_type(HTML), 'text/html') self.assertEqual(text_type(b'<?xml version="1.0"><foo/>'), 'text/xml') self.assertEqual(text_type(b'<?XML version="1.0"><foo/>'), 'text/xml') self.assertEqual(text_type(b'foo bar'), 'text/plain') self.assertEqual( text_type(b'<!DOCTYPE HTML PUBLIC ' b'"-//W3C//DTD HTML 4.01 Transitional//EN" ' b'"http://www.w3.org/TR/html4/loose.dtd">'), 'text/html') self.assertEqual(text_type(b'\n\n<!DOCTYPE html>\n'), 'text/html') # we can also parse text snippets self.assertEqual(text_type(b'<p>Hello</p>'), 'text/html') longtext = b'abc ' * 100 self.assertEqual(text_type(b'<p>' + longtext + b'</p>'), 'text/html') # See https://bugs.launchpad.net/bugs/487998 self.assertEqual(text_type(b' ' * 14 + HTML), 'text/html') self.assertEqual(text_type(b' ' * 14 + b'abc'), 'text/plain') self.assertEqual(text_type(b' ' * 14), 'text/plain')
def test_text_type(self): HTML = b'<HtmL><body>hello world</body></html>' from zope.contenttype import text_type self.assertEqual(text_type(HTML), 'text/html') self.assertEqual(text_type(b'<?xml version="1.0"><foo/>'), 'text/xml') self.assertEqual(text_type(b'<?XML version="1.0"><foo/>'), 'text/xml') self.assertEqual(text_type(b'foo bar'), 'text/plain') self.assertEqual(text_type(b'<!DOCTYPE HTML PUBLIC ' b'"-//W3C//DTD HTML 4.01 Transitional//EN" ' b'"http://www.w3.org/TR/html4/loose.dtd">'), 'text/html') self.assertEqual(text_type(b'\n\n<!DOCTYPE html>\n'), 'text/html') # we can also parse text snippets self.assertEqual(text_type(b'<p>Hello</p>'), 'text/html') longtext = b'abc ' * 100 self.assertEqual(text_type(b'<p>' + longtext + b'</p>'), 'text/html') # See https://bugs.launchpad.net/bugs/487998 self.assertEqual(text_type(b' ' * 14 + HTML), 'text/html') self.assertEqual(text_type(b' ' * 14 + b'abc'), 'text/plain') self.assertEqual(text_type(b' ' * 14), 'text/plain')