def parse_page(self): box = self.page.find("div", class_="box_con") self.chapter.name = box.find("div", class_="bookname").find("h1").text write_model(self.chapter) self.text.chapter_id = get_id(self.chapter) self.text.text = box.find("div", id="content").text write_model(self.text)
def write_novel(novel): if not isinstance(novel, Novel): raise TypeError("Cannot write %s into database!" % type(novel)) if not novel.id: insert_novel(novel) else: update_novel(novel) return get_id(novel)
def write_category(category): if not isinstance(category, Category): raise TypeError("Cannot write %s into database!" % type(category)) if not category.id: insert_category(category) else: update_category(category) return get_id(category)
def write_chapter(chapter): if not isinstance(chapter, Chapter): raise TypeError("Cannot write %s into database!" % type(chapter)) if not chapter.id: insert_chapter(chapter) else: update_chapter(chapter) return get_id(chapter)
def write_author(author): if not isinstance(author, Author): raise TypeError("Cannot write %s into database!" % type(author)) if not author.id: insert_author(author) else: update_author(author) return get_id(author)
def parse_novel_data(self): info = self.page.find(id="info") p_labels = info.find_all("p") self.author.name = p_labels[0].text.split(":")[1] try: get_id(self.author) except IndexError: write_model(self.author) self.author.id = get_id(self.author) self.novel.author_id = self.author.id self.novel.category_id = self.category.id self.novel.name = info.find("h1").text self.novel.state = p_labels[1].text.split(":")[1].split(",")[0] self.novel.last_update_date = p_labels[2].text.split(":")[1] # self.novel.last_update_chapter = p_labels[3].find("a").text self.novel.description = self.page.find( id="intro").find("p").text.replace("<br>", "\n") self.novel.image_path = self.page.find(id="fmimg").find("img")['src'] self.novel.download_from = self.url try: self.novel.id = get_id(self.novel) except IndexError: write_model(self.novel) self.novel.id = get_id(self.novel)
def process_data(self): for category in self.category_list: write_model(category) category.id = get_id(category)