Exemplo n.º 1
0
    def grab(self, url):  # -> PageInfo, content_stream, {path1: image, ...}
        tree = html.parse(url)
        page_info, elements = self._get_content_elements(tree)

        img_by_epub_path = download_and_update_images(elements,
            get_urlbase(url), self.EPUB_IMG_DIR)

        encoding = self._get_encoding(tree)
        body_content = ''.join(map(element_tostring, elements))
        return page_info, StringIO(body_content), img_by_epub_path
Exemplo n.º 2
0
 def grab(self, url):  # -> PageInfo
     page_info, elements, encoding = self._get_content_elements(url)
     img_by_epub_path = download_and_update_images(
         elements, get_urlbase(url), self.EPUB_IMG_DIR)
     body_content = ''.join(map(element_tostring, elements))
     return page_info, StringIO(body_content), img_by_epub_path