def test_save_image_location(): assert (Comic.save_image_location( "http://imgs.xkcd.com/comics/barrel_cropped_(1).jpg", 1, "foo") == "foo/1.jpg") assert Comic.save_image_location("", 1, "bar") == "bar/1" assert (Comic.save_image_location( "http://imgs.xkcd.com/comics/barrel_cropped_(1).jpg?q=123", 1, "foo") == "foo/1.jpg")
def image_in_zipfile(item, directory): zipfile_path = "{}.cbz".format(directory) if not os.path.isfile(zipfile_path): return False image_path_cbz = Comic.save_image_location(item.get("url"), item.get("page")) with ZipFile(zipfile_path, "r") as zipfile: return image_path_cbz in zipfile.namelist()
def get_media_requests(self, item, info): click.echo("Saving image {}".format(item.get("url"))) url, page, title, alt_text = itemgetter("url", "page", "title", "alt_text")(item) image_path_directory = Comic.save_image_location( url, page, info.spider.directory, title) if os.path.isfile(image_path_directory) or self.image_in_zipfile( item, info.spider.directory): click.echo("The image was already downloaded. Skipping...") raise DropItem("The image was already downloaded. Skipping...") if alt_text is not None: with open( Comic.save_alt_text_location(page, info.spider.directory), "w") as alt_text_file: alt_text_file.write(alt_text) yield scrapy.Request( item.get("url"), meta={ "image_file_name": Comic.save_image_filename(item.get("url"), item.get("page"), title, info.spider.directory) }, )