def extract_chapters(self): # get the chapters chapter_container = self.soup.find("ul", class_="list-chapter") chapters = [] for chapter in chapter_container.find_all("li"): # get the chapter page and title i = {} i["chapter_name"] = chapter.find("span", class_="chapter-text").get_text() i["chapter_url"] = chapter.find("a")["href"] i["b64_hash"] = Magna.encode_base64( href=chapter.find("a")["href"] ) # hash to base64 for url purposes # append to list chapters.append(i) return chapters.reverse()
def extract_chapters(self): # get the chapters chapter_container = self.soup.find("div", id="tab-chapper").find("table") chapters = [] for chapter in chapter_container.find_all("tr"): # get the chapter page and title i = {} i["chapter_name"] = chapter.find("a").find("b").get_text() i["chapter_url"] = self.initial + chapter.find("a")["href"] i["b64_hash"] = Magna.encode_base64( href=self.initial + chapter.find("a")["href"] ) # hash to base64 for url purposes # append to list chapters.append(i) return chapters
def extract_chapters(self): # get the chapters chapter_container = self.soup.find("table", id="chapter_table").find("tbody") chapters = [] for chapter in chapter_container.find_all("tr"): # get the chapter page and title i = {} i["chapter_name"] = chapter.find( "td", id="chapter-part").get_text() # get the first td i["chapter_url"] = self.base_url + chapter["data-href"] i["b64_hash"] = Magna.encode_base64( href=self.base_url + chapter["data-href"]) # hash to base64 for url purposes # append to list chapters.append(i) return chapters
def extract_chapters(self): # get all the chapter containers, . since the manga can contain volumes chapter_containers = self.soup.find_all("ul", class_="chapter") chapters = [] for i in chapter_containers: for chapter in i.find_all("li"): # get the last link last_link = chapter.find_all("a")[-1] # get the chapter page and title i = {} i["chapter_name"] = chapter.find( "a" ).get_text() # this is the first link i["chapter_url"] = self.base_url + last_link["href"] i["b64_hash"] = Magna.encode_base64( href=self.base_url + last_link["href"] ) # hash to base64 for url purposes # append to list chapters.append(i) return chapters