def _get_pages(start_page_url): count = 1 soup = utils.get_parsed(site + start_page_url) img = soup.find("img")['src'] next_page = soup.find('span', {'class': 'next'}).findChild()['href'] yield count, img while next_page.startswith(start_page_url): print count, count += 1 soup = utils.get_parsed(site + next_page) img = soup.find("img")['src'] next_page = soup.find('span', {'class': 'next'}).findChild()['href'] yield count, img
def _get_pages(start_page_url): chapter_url = start_page_url.replace('/p1', '') count = 1 soup = utils.get_parsed(start_page_url) img = _find_img(soup) next_page = soup.find('a', {'class': 'nextLink'})['href'] yield count, img while next_page.startswith(chapter_url): print count, count += 1 soup = utils.get_parsed(next_page) img = _find_img(soup) next_page = soup.find('a', {'class': 'nextLink'})['href'] yield count, img
def get_index(): soup = utils.get_parsed(site_index) index = {} links = soup.find_all("a", {"class": "popupLink"}) for link in links: url = link.get('href') index[link.string] = url return index
def get_index(): soup = utils.get_parsed(site_index) index = {} divs = soup.find_all("div", {"class": "series_alpha"}) for div in divs: for link in div.find_all('a'): url = link.get('href') if url and url.startswith('/') and len(url) > 1: index[link.string] = url return index
def get_chapters(url, name): chapters_index_name = utils.index_location_format % re.sub( r'[ -/]', '_', name.lower()) chapters = utils.get_index_from_store(site_folder, chapters_index_name) if chapters: return {int(chapter): url for chapter, url in chapters.items()} soup = utils.get_parsed(site + url) div = soup.find(id='chapterlist') chapters = {} for link in div.find_all('a'): url = link.get('href') if url and url.startswith('/') and len(url) > 1: chapters[int(link.string.replace(name, '').strip())] = url utils.store_index(chapters, site_folder, chapters_index_name) return chapters