def main(): try: #gob URL = "https://es.marvelcdb.com/set/" + "trors" html = zipper.get_html(URL) scrapeMain(html, "TheRiseOfRedSkull") except Exception as e: print(e)
def obtainIssueImages(co_url : str): co_html : BeautifulSoup = zipper.get_html(co_url) # text treating images_to_do = obtainIssueImagesByText(co_html) #if 0 >= len(images_to_do): # html treating #obtainIssueImagesByHtml(co_html) return images_to_do
def scrapeIssue(url: str, dir: str): imgs = zipper.get_html(url).findAll( 'img', src=True, attrs={'class': 'img-responsive img-vertical-card'}) imgs_count = 0 for img in imgs: title = "card-{}.jpg".format(card_idx) img_url = img['src'] if not (img_url.startswith('http')): img_url = "https://es.marvelcdb.com" + img_url print(" " + title + ": " + img_url, ) zipper.get_image(img_url, dir + "/" + title) imgs_count += 1 inc_card_idx() print(url + " - Imagenes procesadas = {}".format(imgs_count))
def main(): try: # comic to scrape title : str = 'Conan-the-Barbarian-1970' # dir destiny dir = title.replace(':', '_').replace(',', '_').replace(' ', '_').replace('(', '').replace(')', '') if not os.path.exists(dir): os.makedirs(dir) # scrape all comic items html = zipper.get_html(URL + "/Comic/" + title) scrapeComic(html, dir) # zip result directory with cbz extension zipper.zip(dir, dir + '.cbz') except Exception as e: print(e)
def main(): try: URL = 'https://readcomiconline.to/Comic/' title = 'The-Savage-Sword-Of-Conan' html = zipper.get_html(URL + title) # dir destiny dir = title.replace(':', '_').replace(',', '_').replace(' ', '_').replace( '(', '').replace(')', '') if not os.path.exists(dir): os.makedirs(dir) issue_idx = 96 scrapeMain(html, dir, issue_idx) zipper.zip(dir, dir + '.cbz') #scrapeIssue('TPB', 'https://readcomiconline.to/Comic/Marvel-Zombies-2006/TPB?id=158840', 'Marvel_Zombies_(2006)/issue-TPB') except Exception as e: print(e)
def scrapeIssue(co_title: str, co_url: str, co_dir: str): print(" " + co_title + ": " + co_url) if not os.path.exists(co_dir): os.makedirs(co_dir) co_html = zipper.get_html(co_url).prettify() img_idx = 0 html_idx: int = co_html.find('lstImages.push') while -1 != html_idx: co_html = co_html[html_idx:] img_url = co_html[:co_html.find('");')] img_url = img_url.replace('lstImages.push("', '').replace('")', '') img_title = "img-" + str(img_idx).zfill(3) + ".jpg" print(img_title + ": " + img_url) zipper.get_image(img_url, co_dir + "/" + img_title) # next img img_idx = img_idx + 1 co_html = co_html[co_html.find(';'):] html_idx = co_html.find('lstImages.push') time.sleep(5)