def parse_site(self, url): source = get_source_code(url, self.options.proxy) self.chapters = Starkana.re_get_chapters.findall(source) self.chapters.reverse() if not self.chapters: raise self.MangaNotFound lower_range = 0 for i in range(0, len(self.chapters)): self.chapters[i] = ('%s%s' % (self.chapters[i][0], self.base_url), self.chapters[i][2], self.chapters[i][2]) if not self.options.auto: print('(%i) %s' % (i + 1, self.chapters[i][1])) else: if self.options.lastDownloaded == self.chapters[i][1]: lower_range = i + 1 upper_range = len(self.chapters) if not self.options.auto: self.chapters_to_download = self.select_chapters(self.chapters) else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def parse_site(self, url): source = get_source_code(url, self.options.proxy) self.chapters = MangaReader.re_get_chapters.findall(source) lower_range = 0 for i in range(0, len(self.chapters)): chapter_number = self.chapters[i][1].replace(self.options.manga, '').strip() self.chapters[i] = ( '%s%s' % (self.base_url, self.chapters[i][0]), '%s%s' % (chapter_number, self.chapters[i][2]), chapter_number) if not self.options.auto: print('(%i) %s' % (i + 1, self.chapters[i][1])) else: if self.options.lastDownloaded == self.chapters[i][1].decode('utf-8'): lower_range = i + 1 upper_range = len(self.chapters) if not self.options.auto: self.chapters_to_download = self.select_chapters(self.chapters) else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def get_manga_url(self): url = '%s/alphabetical' % self.base_url source = get_source_code(url, self.options.proxy) all_series = MangaReader.re_get_series.findall(source[source.find('series_col'):]) keyword = self.select_from_results(all_series) url = (self.base_url + '%s') % keyword return url
def download_chapter(self, max_pages, url, manga_chapter_prefix, current_chapter): """We ignore max_pages, because you can't regex-search that under Batoto.""" s = get_source_code(url, self.options.proxy) soup = BeautifulSoup(s) ol = soup.find("select", id="page_select")("option") n = 1 for i in ol: self.download_image(n, i['value'], manga_chapter_prefix, max_pages, current_chapter) n += 1
def download_chapter(self, max_pages, url, manga_chapter_prefix, current_chapter): pages = EatManga.re_get_page.findall(get_source_code(url, self.options.proxy)) # Remove duplicate pages if any and ensure order pages = list(OrderedDict.fromkeys(pages)) for page in pages: page_url = 'http://eatmanga.com%s' % page[0] self.download_image(page[1], page_url, manga_chapter_prefix, max_pages, current_chapter)
def get_manga_url(self): url = "{}/search?name={}&name_cond=c".format(self.base_url, '+'.join(self.options.manga.split())) s = get_source_code(url, self.options.proxy) soup = BeautifulSoup(s) a = soup.find("div", id="comic_search_results") r = a.tbody.find_all("tr")[1:] seriesl = [] for i in r: try: e = i.td.findAll('a')[1] u = e['href'] t = e.img.next_sibling[1:] seriesl.append((u, t.encode('utf-8'))) except: pass if not seriesl: # signifies no manga found raise self.MangaNotFound("Nonexistent.") url = self.select_from_results(seriesl) return url
def parse_site(self, url): source = get_source_code(url, self.options.proxy) self.chapters = EatManga.re_get_chapters.findall(source) self.chapters.reverse() if not self.chapters: raise self.MangaNotFound lower_range = 0 for i in range(0, len(self.chapters)): if 'upcoming' in self.chapters[i][0]: # Skip not available chapters del self.chapters[i] continue chapter_number = self.chapters[i][2].replace(self.options.manga, '').strip() self.chapters[i] = ('%s%s' % (self.base_url, self.chapters[i][0]), chapter_number, chapter_number) if not self.options.auto: print('(%i) %s' % (i + 1, self.chapters[i][1])) else: if self.options.lastDownloaded == self.chapters[i][1]: lower_range = i + 1 upper_range = len(self.chapters) if not self.options.auto: self.chapters_to_download = self.select_chapters(self.chapters) else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def download_chapter(self, max_pages, url, manga_chapter_prefix, current_chapter): for page in MangaReader.re_get_page.findall(get_source_code(url, self.options.proxy)): page_url = 'http://www.mangareader.net' + page[0] self.download_image(page[1], page_url, manga_chapter_prefix, max_pages, current_chapter)
def parse_site(self, url): source = get_source_code(url, self.options.proxy) if source is None or 'the page you have requested can' in source: # do a 'begins-with' search, then a 'contains' search url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split())) try: source = get_source_code(url, self.options.proxy) if 'Sorry you have just searched, please try 5 seconds later.' in source: print('Searched too soon, waiting 5 seconds...') time.sleep(5) series_results = [] if source is not None: series_results = MangaHere.re_get_series.findall(source) if 0 == len(series_results): url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split())) source = get_source_code(url, self.options.proxy) if source is not None: series_results = MangaHere.re_get_series.findall(source) # 0 results except AttributeError: raise self.MangaNotFound('It doesn\'t exist, or cannot be resolved by autocorrect.') else: keyword = self.select_from_results(series_results) url = '%s/manga/%s/' % (self.base_url, keyword) source = get_source_code(url, self.options.proxy) else: # The Guess worked keyword = fix_formatting(self.options.manga, '_', remove_special_chars=True, lower_case=True, use_ignore_chars=False) # other check for manga removal if our initial guess for the name was wrong if 'it is not available in' in source or "It's not available in" in source: raise self.MangaLicenced('It has been removed.') # that's nice of them # url = 'http://www.mangahere.com/cache/manga/%s/chapters.js' % keyword # source = getSourceCode(url, self.proxy) # chapters is a 2-tuple # chapters[0] contains the chapter URL # chapters[1] contains the chapter title is_chapter_only = False # can't pre-compile this because relies on class name re_get_chapters = re.compile( 'a.*?href="http://.*?mangahere.*?/manga/%s/(v[\d]+)/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword) self.chapters = re_get_chapters.findall(source) if not self.chapters: is_chapter_only = True re_get_chapters = re.compile( 'a.*?href="http://.*?mangahere.*?/manga/%s/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword) self.chapters = re_get_chapters.findall(source) # Sort chapters by volume and chapter number. Needed because next chapter isn't always accurate. self.chapters = sorted(self.chapters, cmp=self.chapter_compare) lower_range = 0 if is_chapter_only: for i in range(0, len(self.chapters)): if self.options.auto: if self.options.lastDownloaded == self.chapters[i][0]: lower_range = i + 1 ch_number = self.re_non_decimal.sub('', self.chapters[i][0]) self.chapters[i] = ( '%s/manga/%s/%s' % (self.base_url, keyword, self.chapters[i][0]), self.chapters[i][0], ch_number) else: for i in range(0, len(self.chapters)): ch_number = self.re_non_decimal.sub('', self.chapters[i][1]) self.chapters[i] = ( '%s/manga/%s/%s/%s' % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]), self.chapters[i][0] + "." + self.chapters[i][1], ch_number) if self.options.auto: if self.options.lastDownloaded == self.chapters[i][1]: lower_range = i + 1 upper_range = len(self.chapters) # Validate whether the last chapter is available source = get_source_code(self.chapters[upper_range - 1][0], self.options.proxy) if ('not available yet' in source) or ('Sorry, the page you have requested can’t be found' in source): # If the last chapter is not available remove it from the list del self.chapters[upper_range - 1] upper_range -= 1 # which ones do we want? if not self.options.auto: for i in range(0, upper_range): if is_chapter_only: print('(%i) %s' % (i + 1, self.chapters[i][0])) else: print('(%i) %s' % (i + 1, self.chapters[i][1])) self.chapters_to_download = self.select_chapters(self.chapters) # XML component else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def parse_site(self, url): source, redirect_url = get_source_code(url, self.options.proxy, True) if redirect_url != url or source is None or "the page you have requested cannot be found" in source: # Could not find the manga page by guessing # Use the website search url = "%s/search.php?name_method=bw&name=%s&is_completed=&advopts=1" % ( self.base_url, "+".join(self.options.manga.split()), ) if self.options.verbose_FLAG: print(url) try: source = get_source_code(url, self.options.proxy) series_results = [] if source is not None: series_results = MangaFox.re_get_series.findall(source) if 0 == len(series_results): url = "%s/search.php?name_method=cw&name=%s&is_completed=&advopts=1" % ( self.base_url, "+".join(self.options.manga.split()), ) if self.options.verbose_FLAG: print(url) source = get_source_code(url, self.options.proxy) if source is not None: series_results = MangaFox.re_get_series.findall(source) # 0 results except AttributeError: raise self.MangaNotFound("It doesn't exist, or cannot be resolved by autocorrect.") else: keyword = self.select_from_results(series_results) if self.options.verbose_FLAG: print("Keyword: %s" % keyword) url = self.base_url % keyword if self.options.verbose_FLAG: print("URL: %s" % url) source = get_source_code(url, self.options.proxy) if source is None: raise self.MangaNotFound("Search Failed to find Manga.") else: # The Guess worked keyword = fix_formatting(self.options.manga) if self.options.verbose_FLAG: print("Keyword: %s" % keyword) if "it is not available in Manga Fox." in source: raise self.MangaNotFound("It has been removed.") # that's nice of them # url = 'http://mangafox.me/cache/manga/%s/chapters.js' % keyword # source = getSourceCode(url, self.proxy) # chapters is a 2-tuple # chapters[0] contains the chapter URL # chapters[1] contains the chapter title is_chapter_only = False # can't pre-compile this because relies on class name re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?" title' % keyword) self.chapters = re_get_chapters.findall(source) if not self.chapters: if self.options.verbose_FLAG: print("Trying chapter only regex") is_chapter_only = True re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(c[\d]+)/[^"]*?" title' % keyword) self.chapters = re_get_chapters.findall(source) self.chapters.reverse() lower_range = 0 if is_chapter_only: for i in range(0, len(self.chapters)): if self.options.verbose_FLAG: print("%s" % self.chapters[i]) if not self.options.auto: print("(%i) %s" % (i + 1, self.chapters[i])) else: if self.options.lastDownloaded == self.chapters[i]: lower_range = i + 1 self.chapters[i] = ( "%s/manga/%s/%s" % (self.base_url, keyword, self.chapters[i]), self.chapters[i], self.chapters[i], ) else: for i in range(0, len(self.chapters)): if self.options.verbose_FLAG: print("%s %s" % (self.chapters[i][0], self.chapters[i][1])) self.chapters[i] = ( "%s/manga/%s/%s/%s" % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]), self.chapters[i][0] + "." + self.chapters[i][1], self.chapters[i][1], ) if not self.options.auto: print("(%i) %s" % (i + 1, self.chapters[i][1])) else: if self.options.lastDownloaded == self.chapters[i][1]: lower_range = i + 1 upper_range = len(self.chapters) # which ones do we want? if not self.options.auto: self.chapters_to_download = self.select_chapters(self.chapters) # XML component else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def parse_site(self, url): s = get_source_code(url, self.options.proxy) soup = BeautifulSoup(s) t = soup.find("table", class_="chapters_list").tbody cl = t.find_all("tr", class_="lang_English") self.chapters = [[]] cnum = self.chapters[0] for i in cl: u = i.td.a['href'] t = i.td.a.img.next_sibling[1:] g = i.find_all("td")[2].get_text().strip() try: c = float(re.search("ch([\d.]+)", u).group(1)) c = str(int(c)) if c.is_integer() else str(c) except AttributeError: c = '0' tu = (u, t, c, g) if len(cnum) == 0 or cnum[0][3] == c: cnum.append(tu) else: self.chapters.append([]) cnum = self.chapters[-1] cnum.append(tu) self.chapters.reverse() if self.chapters == [[]]: raise self.MangaLicenced('It has been removed.') # Look for first chapter that should be downloaded in auto mode lower_range = 0 if self.options.auto: for i in range(0, len(self.chapters)): if self.options.lastDownloaded == self.chapters[i][0][1]: lower_range = i + 1 sc = None for i in self.chapters: if len(i) == 1 or sc is None: sc = i[0] del i[1:] continue ll = [n for n in i if n[2] == sc[2]] if len(ll) != 1: c = self.get_next_url(sc[0]) i[0] = [n for n in i if n[0] == c][0] if self.options.verbose_FLAG: print("Anomaly at chapter {} ({} matches, chose {})".format(i[0][3], len(ll), i[0][2])) del i[1:] sc = i[0] continue i[0] = ll[0] sc = i[0] del i[1:] self.chapters = [i[0] for i in self.chapters] upper_range = len(self.chapters) # which ones do we want? if not self.options.auto: for n, c in enumerate(self.chapters): print("{:03d}. {}".format(n + 1, c[1].encode('utf-8'))) self.chapters_to_download = self.select_chapters(self.chapters) # XML component else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def get_next_url(self, c): s = get_source_code(c, self.options.proxy) soup = BeautifulSoup(s) l = soup.find("img", title="Next Chapter").parent return l['href']