def fetch_chapter(chapter_prefix, chapter): chapter_bsoup = get_soup(get_chapter_html(chapter_prefix, chapter)) chapter_title = get_chapter_title(chapter_bsoup) chapter_content = get_chapter_content(chapter_bsoup) return Chapter(no=chapter, title=chapter_title, content=chapter_content) # novels = fetch_all_novel("http://www.truyenngan.com.vn/truyen-ngan.html") # for novel in novels: # # novel.print() # for chapter in novel.chapters: # chapter.print() # # print("--------------------------------------------------------") # chapter = fetch_chapter("http://truyenfull.vn/truyen-than-khong-thien-ha", 1) # chapter.print() # def db_save_novel(novel_id): # def db_save_chapter(novel_id, title, content, chapter): # html = get_html("http://truyenfull.vn/pham-nhan-tu-tien", 333) # soup = get_soup(html) # title = get_chapter_title(soup) # content = get_content(soup) # print(title) # print(content) # html = get_novel_html("http://truyenfull.vn/truyen-than-khong-thien-ha/") # soup = get_soup(html) # print(get_novel_genre(soup))
def fetch_all_novel(novel_list_url): soup = get_soup(get_html(novel_list_url)) novels = [] all_story_group = soup.find_all("div", "wrap-carousel-cate") for story_group_bsoup in all_story_group: story_bsoups = story_group_bsoup.find("ul").find_all("li") story_bsoup = story_bsoups[0] detail_link = "http://www.truyenngan.com.vn/" + story_bsoup.find( "div", "carousel-cate-img").a["href"] detail = get_soup(get_html(detail_link)) image = story_bsoup.find("div", "carousel-cate-img").a.img["src"] title = story_bsoup.find("div", "carousel-cate-title").a.get_text() description = detail.find("strong").get_text() novel = Novel(title, "", description, image, "Truyện ngắn") novel.chapters = [] novels.append(novel) no = 1 for story_bsoup in story_bsoups: detail_link = "http://www.truyenngan.com.vn/" + story_bsoup.find( "div", "carousel-cate-img").a["href"] title = story_bsoup.find("div", "carousel-cate-title").a.get_text() detail = get_soup(get_html(detail_link)) author = detail.find("div", "details-poster").a.strong.get_text() content = str(detail.find("div", "maincontent")) chapter = Chapter(no, title + " - " + author, content) novel.chapters.append(chapter) no += 1 return novels
def create_chapter(uid, name, mid): from models import db from models.user import User from models.manga import Manga from models.chapter import Chapter # check user privilege user = User.query.get(uid) if not user.privilege.operate_chapter: return 1 # check manga existence manga = Manga.query.get(mid) if manga is None: return 2 # create chapter new_chapter = Chapter(chapter_name=name, aff_mid=mid) db.session.add(new_chapter) db.session.commit() update_chapter(new_chapter) # create two default quests quest1_code = create_quest(uid, "翻译", "TRANSLATION", False, new_chapter.cid) quest2_code = create_quest(uid, "校对", "PROOFREADING", False, new_chapter.cid, isClosed=True) quest3_code = create_quest(uid, "嵌字", "TYPESETTING", False, new_chapter.cid, isClosed=True) return 0
def __load_chapters(self, chapters_texts): if self.language not in self.pipeline: self.pipeline[self.language] = stanza.Pipeline( **DocumentParser.PIPELINE_CFG, lang=self.language) res = [] for chapter_text in chapters_texts: title, text = chapter_text.split('\n', 1) model = self.pipeline[self.language](text) res.append(Chapter(model, title, self.language)) return res
def begin_new_chapter(self, chapter_number_str): chapter_number = int(chapter_number_str) if(self.current_chapter): self.current_book.add_chapter(self.current_chapter) self.current_chapter = Chapter(chapter_number)
def parse_chapter(self, text, title=None): model = self.pipeline(text) return Chapter(model, title if title else self.chapter_title, self.lang)