def test_sitemap_detect(self): self.assertTrue(SitemapReader.is_file( io.BytesIO('<?xml > <urlset >'.encode('utf-16le')) )) self.assertFalse(SitemapReader.is_file( io.BytesIO('<!DOCTYPE html><html><body>'.encode('utf-16le')) )) self.assertFalse(SitemapReader.is_file( io.BytesIO(b'<html><body>hello<urlset>') )) self.assertTrue(SitemapReader.is_file( io.BytesIO(b'<?xml version> <urlset>') )) data_file = io.BytesIO() g_file = gzip.GzipFile(fileobj=data_file, mode='wb') g_file.write('<?xml version> <urlset>'.encode('utf-16le')) g_file.close() data_file.seek(0) self.assertTrue(SitemapReader.is_file( data_file )) self.assertTrue( SitemapReader.is_url(URLInfo.parse('example.com/sitemaps1.xml')) ) self.assertTrue( SitemapReader.is_url(URLInfo.parse('example.com/robots.txt')) ) self.assertFalse( SitemapReader.is_url(URLInfo.parse('example.com/image.jpg')) ) self.assertTrue( SitemapReader.is_request(Request.new('example.com/sitemaps34.xml')) ) self.assertFalse( SitemapReader.is_request(Request.new('example.com/image.jpg')) )