def get_most_populars(self): """ Returns best noted manga list """ r = self.session_get(self.most_populars_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'lxml') results = [] for tr_element in soup.find('table', class_='table').tbody.find_all( 'tr', recursive=False): a_element = tr_element.find_all('td')[2].a a_element.span.decompose() results.append( dict( name=a_element.text.strip(), slug=a_element.get('href').split('/')[-1], )) return results
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data by scraping chapter HTML page content Currently, only pages are expected. """ r = self.session_get(self.chapter_url.format(manga_slug, chapter_slug)) if r.status_code != 200: return None mime_type = get_buffer_mime_type(r.content) if mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') pages_imgs = soup.find( 'div', class_='container-chapter-reader').find_all('img') data = dict(pages=[], ) for img in pages_imgs: data['pages'].append( dict( slug=None, # slug can't be used to forge image URL image=img.get('src'), )) return data
def get_most_populars(self): """ Returns TOP 10 manga """ r = self.session_get(self.most_populars_url.format(LANGUAGES_CODES[self.lang])) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for li_element in soup.find('ul', class_='lst_type1').find_all('li'): split_url = urlsplit(li_element.a.get('href')) url = '{0}?{1}'.format(split_url.path, split_url.query) slug = split_url.query.split('=')[-1] results.append(dict( slug=slug, url=url, name=li_element.a.find('p', class_='subj').text.strip(), )) return results
def search_by_type(self, term, type): assert type in ('CHALLENGE', 'WEBTOON', ), 'Invalid type' r = self.session_get(self.search_url, params=dict(keyword=term, type=type)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') if type == 'CHALLENGE': a_elements = soup.find_all('a', class_='challenge_item') elif type == 'WEBTOON': a_elements = soup.find_all('a', class_='card_item') results = [] for a_element in a_elements: # Small difference here compared to other servers # the slug can't be used to forge manga URL, we must store the full url (relative) results.append(dict( slug=a_element.get('href').split('=')[-1], url=a_element.get('href'), name=a_element.find('p', class_='subj').text.strip(), )) return results
def get_mangas(self, page=1): r = self.session_get('{0}/{1}'.format(self.mangas_url, page)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for element in soup.find('div', class_='series').find_all('div', class_='group'): a_element = element.find('div', class_='title').a results.append( dict( slug=a_element.get('href').split('/')[-2], name=a_element.get('title'), )) return results
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data by scraping chapter HTML page content Currently, only pages are expected. """ r = self.session_get(self.chapter_url.format(chapter_url), headers={'user-agent': USER_AGENT}) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') imgs = soup.find('div', id='_imageList').find_all('img') data = dict( pages=[], ) for img in imgs: data['pages'].append(dict( slug=None, # slug can't be used to forge image URL image=img.get('data-url').strip(), )) return data
def get_manga_chapter_page_image(self, manga_slug, manga_name, chapter_slug, page): """ Returns chapter page scan (image) content """ r = self.session_get(self.page_url.format(page['slug'])) if r is None: return None soup = BeautifulSoup(r.text, 'html.parser') try: url = soup.find('img', id='balloonsimg').get('src') except Exception: url = soup.find('div', id='balloonsimg').get('style').split(';')[0].split(':')[1][4:-1] r = self.session_get(self.base_url + url) if r is None or r.status_code != 200: return None mime_type = get_buffer_mime_type(r.content) if not mime_type.startswith('image'): return None return dict( buffer=r.content, mime_type=mime_type, name='{0}.png'.format(page['slug']), )
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga data by scraping manga HTML page content """ r = self.session_get(self.manga_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') data = dict( pages=[], ) for a_element in soup.find('div', class_='chapters', ch=chapter_slug).p.find_all('a'): data['pages'].append(dict( slug=a_element.get('href')[:-5].split('-')[-1], image=None, )) return data
def get_manga_chapter_page_image(self, manga_slug, manga_name, chapter_slug, page): """ Returns chapter page scan (image) content """ r = self.session_get( self.page_url.format(manga_slug, self.decode_chapter_slug(chapter_slug), page['slug'])) if r.status_code != 200: return None soup = BeautifulSoup(r.text, 'html.parser') url = soup.find('img', class_='CurImage').get('src') r = self.session_get(url) if r.status_code != 200: return None mime_type = get_buffer_mime_type(r.content) if not mime_type.startswith('image'): return None return dict( buffer=r.content, mime_type=mime_type, name=url.split('/')[-1], )
def get_most_populars(self): """ Returns most popular manga list """ r = self.session_post(self.search_url, data=dict(page=1, sortBy='popularity', sortOrder='descending')) if r.status_code != 200: return None mime_type = get_buffer_mime_type(r.content) if mime_type != 'text/plain': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for a_element in soup.find_all('a', class_='resultLink'): results.append( dict( name=a_element.text.strip(), slug=a_element.get('href').split('/')[-1], )) return results
def search(self, term): r = self.session_get(self.api_search_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'application/octet-stream': return None resp_data = MangaplusResponse.loads(r.content) if resp_data.error: return None results = [] term = unidecode.unidecode(term).lower() for title in resp_data.success.titles_all.titles: if title.language != LANGUAGES_CODES[self.lang]: continue if term not in unidecode.unidecode(title.name).lower(): continue results.append( dict( slug=title.id, name=title.name, cover=title.portrait_image_url, )) return results
def get_most_populars(self): """ Returns hottest manga list """ r = self.session_get(self.api_most_populars_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'application/octet-stream': return None resp_data = MangaplusResponse.loads(r.content) if resp_data.error: return None results = [] for title in resp_data.success.titles_ranking.titles: if title.language != LANGUAGES_CODES[self.lang]: continue results.append( dict( slug=title.id, name=title.name, cover=title.portrait_image_url, )) return results
def get_manga_chapter_page_image(self, manga_slug, manga_name, chapter_slug, page): """ Returns chapter page scan (image) content """ r = self.session_get(page['image']) if r is None or r.status_code != 200: return None if page['encryption_key'] is not None: # Decryption key_stream = [ int(v, 16) for v in RE_ENCRYPTION_KEY.findall(page['encryption_key']) ] block_size_in_bytes = len(key_stream) content = bytes([ int(v) ^ key_stream[index % block_size_in_bytes] for index, v in enumerate(r.content) ]) else: content = r.content mime_type = get_buffer_mime_type(content) if not mime_type.startswith('image'): return None return dict( buffer=content, mime_type=mime_type, name=page['image'].split('?')[0].split('/')[-1], )
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data from API Currently, only pages are expected. """ r = self.session_get(self.api_chapter_url.format(chapter_slug)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'application/octet-stream': return None resp = MangaplusResponse.loads(r.content) if resp.error: return None resp_data = resp.success.manga_viewer data = dict(pages=[], ) for page in resp_data.pages: if page.page is None: continue data['pages'].append( dict( slug=None, image=page.page.image_url, encryption_key=page.page.encryption_key, )) return data
def get_manga_chapter_page_image(self, manga_slug, manga_name, chapter_slug, page): """ Returns chapter page scan (image) content """ if page.get('image'): r = self.session_get(self.image_url.format(page['image'])) name = page['image'] else: r = self.session_get( 'https://fakeimg.pl/1500x2126/ffffff/000000/', params=dict( text='\n'.join(textwrap.wrap(page['text'], 25)), font_size=64, font='museo' ) ) name = '{0}-alt-text.png'.format(chapter_slug) if r is None or r.status_code != 200: return None mime_type = get_buffer_mime_type(r.content) if not mime_type.startswith('image'): return None return dict( buffer=r.content, mime_type=mime_type, name=name, )
def search(self, term=None): r = self.session_get(self.search_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for a_element in soup.find('div', class_='h-left').find_all('a'): name = a_element.find('div', class_='hmi-titre').text.strip() if term is None or unidecode.unidecode( term).lower() in unidecode.unidecode(name).lower(): results.append( dict( slug=a_element.get('href').split('/')[-1], name=name, )) return results
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data by scraping chapter HTML page content """ manga_slug = manga_name.replace(' ', '_') r = self.session_get(self.chapter_url.format(manga_slug, chapter_slug)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if 'leitor' not in r.url: # Chapter page doesn't exist, we have been redirected to manga page return None if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') data = dict(pages=[], ) for img_element in soup.find_all('img', class_='img-manga'): url = img_element.get('src') if f'{manga_name}/{chapter_slug}' not in url: continue data['pages'].append(dict( slug=None, image=url.split('/')[-1], )) return data
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data by scraping chapter HTML page content Currently, only pages (list of images filenames) are expected. """ r = self.session_get(self.chapter_url.format(manga_slug, chapter_slug)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') pages_imgs = soup.find('div', id='all').find_all('img') data = dict(pages=[], ) for img in pages_imgs: data['pages'].append( dict( slug=None, # not necessary, we know image url directly image=img.get('data-src').strip().split('/')[-1], )) return data
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data by scraping chapter HTML page content Currently, only pages are expected. """ r = self.session_get(self.chapter_url.format(manga_slug, chapter_slug)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.url[:-1] == self.base_url: # Chapter page doesn't exist, we have been redirected to homepage return None if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') imgs_elements = soup.find('div', class_='main_img').find_all('img') data = dict(pages=[], ) for img_element in imgs_elements: url = img_element.get('data-src') if not url or not url.startswith('lel'): continue data['pages'].append(dict( slug=None, image=url, )) return data
def get_most_populars(self): """ Returns most viewed manga list """ r = self.session_get(self.most_populars_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for tr_element in soup.find('table', id='mangaList').tbody.find_all('tr'): td_elements = tr_element.find_all('td') a_element = td_elements[0].a results.append( dict( slug=a_element.get('href').split('/')[-2], name=a_element.text.strip(), )) return results
def search(self, term): r = self.session_get(self.search_url, params=dict(title=term)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for tr_element in soup.find('table', id='mangaList').tbody.find_all('tr'): td_elements = tr_element.find_all('td') if td_elements[3].text.strip() == '0': # Skipped manga with no chapters continue a_element = td_elements[0].a results.append( dict( slug=a_element.get('href').split('/')[-2], name=a_element.text.strip(), )) return results
def get_most_populars(self): """ Returns Hot manga list """ r = self.session_get(self.most_populars_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for a_element in soup.find('ul', class_='direlist').find_all( 'a', class_='bookname'): results.append( dict( name=a_element.text.strip(), slug=unquote_plus( a_element.get('href')).split('/')[-1][:-5], )) return results
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data by scraping chapter HTML page content Currently, only pages are expected. """ r = self.session_get(self.chapter_url.format(manga_slug, chapter_slug)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') data = dict(pages=[], ) for script_element in soup.find_all('script'): script = script_element.string if not script or not script.strip().startswith('var pages'): continue pages = json.loads( script.strip().split('\n')[0].split('=')[1][:-1]) for page in pages: data['pages'].append( dict( slug=None, # not necessary, we know image url already image='https:{0}'.format(page['fs']), )) break return data
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data by scraping chapter HTML page content Currently, only pages are expected. """ r = self.session_get(self.chapter_url.format(manga_slug, chapter_slug)) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') options_elements = soup.find('select', id='page').find_all('option') data = dict(pages=[], ) for option_element in options_elements: data['pages'].append( dict( slug=option_element.get('value').split('/')[-1], image=None, )) return data
def get_manga_chapter_page_image(self, manga_slug, manga_name, chapter_slug, page): """ Returns chapter page scan (image) content """ # Scrap HTML page to get image url r = self.session_get(self.page_url.format(manga_slug, page['slug'])) if r is None: return None soup = BeautifulSoup(r.text, 'html.parser') url = soup.find('img', id='manga_pic_1').get('src') # Get scan image r = self.session_get(url) if r is None or r.status_code != 200: return None mime_type = get_buffer_mime_type(r.content) if not mime_type.startswith('image'): return None return dict( buffer=r.content, mime_type=mime_type, name=url.split('/')[-1], )
def get_most_populars(self): """ Returns TOP manga """ r = self.session_get(self.base_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'lxml') results = [] for li_element in soup.find('div', id='top_mangas_all_time').find_all('li'): a_element = li_element.find_all('a')[0] results.append( dict( name=a_element.text.strip(), slug=a_element.get('href').split('/')[-2], )) return results
def get_manga_chapter_data(self, manga_slug, manga_name, chapter_slug, chapter_url): """ Returns manga chapter data Currently, only pages are expected. """ slug = int(chapter_slug[1:-1]) if chapter_slug[-1] != '0': slug = f'{slug}.{chapter_slug[-1]}' if chapter_slug[0] != '1': slug = f'{slug}-index-{chapter_slug[0]}' r = self.session_get(self.chapter_url.format(manga_slug, slug)) if r.status_code != 200: return None mime_type = get_buffer_mime_type(r.content) if mime_type != 'text/html': return None soup = BeautifulSoup(r.content, 'lxml') chapter = None domain = None try: script = soup.find_all('script')[-1].string if script: for line in script.split('\n'): line = line.strip() if not line.startswith('vm.CurChapter') and not line.startswith('vm.CurPathName'): continue if line.startswith('vm.CurChapter'): chapter = json.loads(line.split('=')[1].strip()[:-1]) elif line.startswith('vm.CurPathName'): domain = line.split('=')[1].strip()[1:-2] if chapter is not None and domain is not None: break except Exception as e: log_error_traceback(e) return None if chapter is None or domain is None: return None image_prefix = chapter_slug[1:-1] if chapter['Directory']: image_prefix = f'{chapter["Directory"]}/{image_prefix}' data = dict( pages=[], ) for index in range(int(chapter['Page'])): data['pages'].append(dict( slug=None, image='https://{0}/manga/{1}/{2}-{3:03d}.png'.format(domain, manga_slug, image_prefix, index + 1), )) return data
def search(self, term): r = self.session_get(self.search_url, params=dict( tag_mode_exc='any', tag_mode_inc='all', title=term, s=2, )) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for element in soup.find_all('a', class_='manga_title'): results.append( dict( slug=element.get('href').replace('/title/', ''), name=element.text.strip(), )) return results
def get_most_populars(self): """ Returns most popular mangas (bayesian rating) """ r = self.session_get(self.most_populars_url) if r is None: return None mime_type = get_buffer_mime_type(r.content) if r.status_code != 200 or mime_type != 'text/html': return None soup = BeautifulSoup(r.text, 'html.parser') results = [] for element in soup.find_all('a', class_='manga_title'): results.append( dict( slug=element.get('href').replace('/title/', '').split('/')[0], name=element.text.strip(), )) return results
def complete(manga_data, server): if server != self.server or manga_data['slug'] != self.manga_slug: return False self.manga_data = manga_data # Populate manga card try: cover_data = self.server.get_manga_cover_image(self.manga_data.get('cover')) except Exception as e: cover_data = None user_error_message = log_error_traceback(e) if user_error_message: self.show_notification(user_error_message) if cover_data is None: pixbuf = Pixbuf.new_from_resource_at_scale('/info/febvre/Komikku/images/missing_file.png', 174, -1, True) else: cover_stream = Gio.MemoryInputStream.new_from_data(cover_data, None) if get_buffer_mime_type(cover_data) != 'image/gif': pixbuf = Pixbuf.new_from_stream_at_scale(cover_stream, 174, -1, True, None) else: pixbuf = scale_pixbuf_animation(PixbufAnimation.new_from_stream(cover_stream), 174, -1, True, True) if isinstance(pixbuf, PixbufAnimation): self.builder.get_object('cover_image').set_from_animation(pixbuf) else: self.builder.get_object('cover_image').set_from_pixbuf(pixbuf) authors = html_escape(', '.join(self.manga_data['authors'])) if self.manga_data['authors'] else '-' self.builder.get_object('authors_value_label').set_markup('<span size="small">{0}</span>'.format(authors)) genres = html_escape(', '.join(self.manga_data['genres'])) if self.manga_data['genres'] else '-' self.builder.get_object('genres_value_label').set_markup('<span size="small">{0}</span>'.format(genres)) status = _(Manga.STATUSES[self.manga_data['status']]) if self.manga_data['status'] else '-' self.builder.get_object('status_value_label').set_markup('<span size="small">{0}</span>'.format(status)) scanlators = html_escape(', '.join(self.manga_data['scanlators'])) if self.manga_data['scanlators'] else '-' self.builder.get_object('scanlators_value_label').set_markup('<span size="small">{0}</span>'.format(scanlators)) self.builder.get_object('server_value_label').set_markup( '<span size="small"><a href="{0}">{1} [{2}]</a>\n{3} chapters</span>'.format( self.server.get_manga_url(self.manga_data['slug'], self.manga_data.get('url')), html_escape(self.server.name), self.server.lang.upper(), len(self.manga_data['chapters']) ) ) self.builder.get_object('synopsis_value_label').set_text(self.manga_data['synopsis'] or '-') self.activity_indicator.stop() self.show_page('manga') return False