Exemplo n.º 1
0
def get_sitemap_body(response):
    '''Return the sitemap body contained in the given response, or None if the
    response is not a sitemap.
    '''
    if isinstance(response, XmlResponse):
        return response.body
    elif is_gzipped(response):
        return gunzip(response.body)
    elif response.url.endswith('.xml'):
        return response.body
    elif response.url.endswith('.xml.gz'):
        return gunzip(response.body)
Exemplo n.º 2
0
 def process_response(self, response):
     content_encoding = response.headers.getlist('Content-Encoding')
     if content_encoding and not is_gzipped(response):
         max_length = self.settings.get_int('DOWNLOAD_SIZE_LIMIT', 0,
                                            response.request)
         encoding = content_encoding.pop()
         if not content_encoding:
             del response.headers['Content-Encoding']
         decoded_body = self._decode(response.body, encoding.lower(),
                                     max_length)
         resp_cls = factory.from_args(headers=response.headers,
                                      url=response.url)
         response = response.replace(cls=resp_cls, body=decoded_body)
     return response
Exemplo n.º 3
0
 def process_response(self, response):
     content_encoding = response.headers.getlist('Content-Encoding')
     if content_encoding and not is_gzipped(response):
         max_length = self.settings.get_int('DOWNLOAD_SIZE_LIMIT', 0,
                                            response.request)
         encoding = content_encoding.pop()
         if not content_encoding:
             del response.headers['Content-Encoding']
         decoded_body = self._decode(response.body, encoding.lower(),
                                     max_length)
         resp_cls = factory.from_args(headers=response.headers,
                                      url=response.url)
         response = response.replace(cls=resp_cls, body=decoded_body)
     return response