def handle(self, *args, **options): if options["update"] and options["group"]: chapter = Chapter.objects.get( series__slug=options["series"], chapter_number=float(options["update"]), group=options["group"], ) chapter_post_process(chapter) elif options["dl"] == "jb" and options["series"] and options[ "latest_chap"]: latest_volume = (Volume.objects.filter( series__slug=options["series"]).order_by("-volume_number") [0].volume_number) chapters = set([ str(chapter.chapter_number) for chapter in Chapter.objects.filter( series__slug=options["series"], group=self.jb_group) ]) self.jaiminis_box_checker( chapters, options["series"], latest_volume, self.jaiminisbox_manga[options["series"]], latest_chap=options["latest_chap"], ) else: if options["lookup"] == "all" or options["lookup"] == "jb": for manga in self.jaiminisbox_manga: latest_volume = (Volume.objects.filter( series__slug=manga).order_by("-volume_number") [0].volume_number) chapters = set([ str(chapter.chapter_number) for chapter in Chapter.objects.filter( series__slug=manga, group=self.jb_group) ]) self.jaiminis_box_checker( chapters, manga, latest_volume, self.jaiminisbox_manga[manga], ) if options["lookup"] == "all" or options["lookup"] == "md": for manga in self.mangadex_manga_id: latest_volume = (Volume.objects.filter( series__slug=manga).order_by("-volume_number") [0].volume_number) if manga == "Kaguya-Wants-To-Be-Confessed-To": chapters = [ str(chapter.chapter_number) for chapter in Chapter.objects.filter( series__slug=manga) ] else: chapters = [ str(chapter.chapter_number) for chapter in Chapter.objects.filter( series__slug=manga, group=self.md_group) ] self.mangadex_checker(chapters, manga, latest_volume)
def mangadex_download(self, chapters, series, group, latest_volume, url=""): for chapter in chapters: if not chapters[chapter]: print(f"Could not download chapter {chapter}.") continue chapter_pages = chapters[chapter][1] chapter_folder, group_folder = self.create_chapter_obj( chapter, group, series, latest_volume, chapters[chapter][0]) ch = Chapter.objects.get(series=series, chapter_number=float(chapter), group=group) padding = len(str(len(chapter_pages))) print(f"Downloading chapter {chapter}...") print(f"Found {len(chapter_pages)} pages...") for idx, page in enumerate(chapter_pages): extension = page.rsplit(".", 1)[1] page_file = f"{str(idx+1).zfill(padding)}.{extension}" resp = requests.get(page) if resp.status_code == 200: page_content = resp.content with open( os.path.join(chapter_folder, group_folder, page_file), "wb", ) as f: f.write(page_content) else: print("failed at mangadex_download", idx, page) chapter_post_process(ch, update_version=False)
def download_source_chapter( self, title: str, chapter_number: float, series: Series, group: Group, latest_volume: int, md_chapter_id: int, md_chapter_data: Optional[Dict[str, Any]] = None, ): if not md_chapter_data: md_chapter_data = self.get_source_chapter_data(md_chapter_id) if not md_chapter_data: return None md_chapter_pages = md_chapter_data["pages"] else: md_chapter_pages = md_chapter_data["pages"] if not md_chapter_pages: print( f"Failed to get chapter pages of md chapter {chapter_number} with id: {md_chapter_id} on MangaDex for {series.slug}.", group.name, ) return ch_obj, chapter_folder, group_folder, is_update = create_chapter_obj( chapter_number, group, series, latest_volume, title) ch_obj.scraper_hash = self.generate_source_chapter_hash( md_chapter_data) ch_obj.save() padding = len(str(len(md_chapter_pages))) print( f"Downloading chapter: {chapter_number} group: {series.name} series: {series.name} from Mangadex..." ) print(f"Found {len(md_chapter_pages)} pages...") for idx, page in enumerate(md_chapter_pages): extension = page.rsplit(".", 1)[1] page_file = f"{str(idx+1).zfill(padding)}.{extension}" resp = requests.get(page) if resp.status_code == 200: page_content = resp.content with open( os.path.join(chapter_folder, group_folder, page_file), "wb", ) as f: f.write(page_content) else: print("failed at mangadex_download", idx, page) chapter_post_process(ch_obj, is_update=is_update) return ch_obj
def handle(self, *args, **options): for chapter in Chapter.objects.all(): chapter_post_process(chapter, update_version=False)
def jaiminis_box_checker(self, downloaded_chapters, series, latest_volume, url, latest_chap=None): chapters = {} group = Group.objects.get(pk=self.jb_group) series = Series.objects.get(slug=series) if not latest_chap: print(url) resp = requests.get(url) if resp.status_code == 200: webpage = resp.text soup = BeautifulSoup(webpage, "html.parser") for chapter in soup.select(".list .group .element"): chapter_regex = re.search( r"^Chapter (\d*\.?\d*): (.*)$", chapter.select(".title")[0].text, ) chap_numb = chapter_regex.group(1) if str(float(chap_numb)) in downloaded_chapters or ( self.blacklist_jb and str(float(chap_numb)) in self.blacklist_jb[series.slug]): continue else: print( f"Found new chapter ({chap_numb}) on Jaiminisbox for {series}." ) chapter_dl_url = chapter.select( ".icon_wrapper a")[0]["href"] chapters[chap_numb] = { "title": chapter_regex.group(2), "url": chapter_dl_url, } else: print( f"[{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}] Failed to reach JB page for {series}. Response status: {resp.status}" ) else: latest_chap_slug = latest_chap.replace(".", "/") resp = requests.get( f"https://jaiminisbox.com/reader/read/{series.slug.lower()}/en/0/{latest_chap_slug}/page/1" ) if resp.status_code == 200: webpage = resp.text soup = BeautifulSoup(webpage, "html.parser") title = soup.select(".tbtitle .text a")[1].text.split( ":", 1)[1].strip() chapters[str(latest_chap)] = { "title": title, "url": f"https://jaiminisbox.com/reader/download/{series.slug.lower()}/en/0/{latest_chap_slug}/", } else: print(resp.status) for chapter in chapters: chapter_url = chapters[chapter]["url"] if ("(Digital)" in chapters[chapter]["title"] and str( float(int(float(chapter)))) in downloaded_chapters): if hasattr(settings, SCRAPER_BLACKLIST_FILE) and os.path.exists( settings.SCRAPER_BLACKLIST_FILE): with open(settings.SCRAPER_BLACKLIST_FILE, "r+") as f: blacklist = json.load(f) f.seek(0) f.truncate() blacklist.append(str(chapter)) json.dump(blacklist, f) chapter = str(float(int(float(chapter)))) if str(float(chapter)) not in downloaded_chapters: reupdating = False chapter_folder, group_folder = self.create_chapter_obj( chapter, group, series, latest_volume, chapters[chapter]["title"]) ch = Chapter.objects.get(series=series, group=self.jb_group, chapter_number=float(chapter)) print(f"Downloading chapter {chapter}...") else: reupdating = True ch = Chapter.objects.get(series=series, group=self.jb_group, chapter_number=float(chapter)) chapter_folder = os.path.join(settings.MEDIA_ROOT, "manga", series.slug, "chapters", ch.folder) group_folder = str(self.jb_group) print(f"Reupdating chapter pages for {chapter}...") for f in os.listdir(os.path.join(chapter_folder, group_folder)): os.remove(os.path.join(chapter_folder, group_folder, f)) resp = requests.get(chapter_url) if resp.status_code == 200: data = resp.content with zipfile.ZipFile(io.BytesIO(data)) as zip_file: all_pages = sorted(zip_file.namelist()) padding = len(str(len(all_pages))) for idx, page in enumerate(all_pages): extension = page.rsplit(".", 1)[1] page_file = f"{str(idx+1).zfill(padding)}.{extension}" with open( os.path.join(chapter_folder, group_folder, page_file), "wb", ) as f: f.write(zip_file.read(page)) chapter_post_process(ch, update_version=reupdating)